{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [],
   "source": [
    "_tags = pd.read_csv(\"data/tags.csv\",usecols=[1,2]).dropna()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [],
   "source": [
    "tags = _tags.groupby(\"movieId\").agg(list)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "                                                       tag\nmovieId                                                   \n1                                      [pixar, pixar, fun]\n2        [fantasy, magic board game, Robin Williams, game]\n3                                             [moldy, old]\n5                                      [pregnancy, remake]\n7                                                 [remake]\n...                                                    ...\n183611                     [Comedy, funny, Rachel McAdams]\n184471   [adventure, Alicia Vikander, video game adapta...\n187593               [Josh Brolin, Ryan Reynolds, sarcasm]\n187595                          [Emilia Clarke, star wars]\n193565                  [anime, comedy, gintama, remaster]\n\n[1572 rows x 1 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>tag</th>\n    </tr>\n    <tr>\n      <th>movieId</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>[pixar, pixar, fun]</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>[fantasy, magic board game, Robin Williams, game]</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>[moldy, old]</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>[pregnancy, remake]</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>[remake]</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>183611</th>\n      <td>[Comedy, funny, Rachel McAdams]</td>\n    </tr>\n    <tr>\n      <th>184471</th>\n      <td>[adventure, Alicia Vikander, video game adapta...</td>\n    </tr>\n    <tr>\n      <th>187593</th>\n      <td>[Josh Brolin, Ryan Reynolds, sarcasm]</td>\n    </tr>\n    <tr>\n      <th>187595</th>\n      <td>[Emilia Clarke, star wars]</td>\n    </tr>\n    <tr>\n      <th>193565</th>\n      <td>[anime, comedy, gintama, remaster]</td>\n    </tr>\n  </tbody>\n</table>\n<p>1572 rows × 1 columns</p>\n</div>"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tags"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [],
   "source": [
    "# 加载电影列表数据\n",
    "movies = pd.read_csv(\"data/movies.csv\",index_col=\"movieId\")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "data": {
      "text/plain": "                                             title  \\\nmovieId                                              \n1                                 Toy Story (1995)   \n2                                   Jumanji (1995)   \n3                          Grumpier Old Men (1995)   \n4                         Waiting to Exhale (1995)   \n5               Father of the Bride Part II (1995)   \n...                                            ...   \n193581   Black Butler: Book of the Atlantic (2017)   \n193583                No Game No Life: Zero (2017)   \n193585                                Flint (2017)   \n193587         Bungo Stray Dogs: Dead Apple (2018)   \n193609         Andrew Dice Clay: Dice Rules (1991)   \n\n                                              genres  \nmovieId                                               \n1        Adventure|Animation|Children|Comedy|Fantasy  \n2                         Adventure|Children|Fantasy  \n3                                     Comedy|Romance  \n4                               Comedy|Drama|Romance  \n5                                             Comedy  \n...                                              ...  \n193581               Action|Animation|Comedy|Fantasy  \n193583                      Animation|Comedy|Fantasy  \n193585                                         Drama  \n193587                              Action|Animation  \n193609                                        Comedy  \n\n[9742 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title</th>\n      <th>genres</th>\n    </tr>\n    <tr>\n      <th>movieId</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>Toy Story (1995)</td>\n      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Jumanji (1995)</td>\n      <td>Adventure|Children|Fantasy</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Grumpier Old Men (1995)</td>\n      <td>Comedy|Romance</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Waiting to Exhale (1995)</td>\n      <td>Comedy|Drama|Romance</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Father of the Bride Part II (1995)</td>\n      <td>Comedy</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>193581</th>\n      <td>Black Butler: Book of the Atlantic (2017)</td>\n      <td>Action|Animation|Comedy|Fantasy</td>\n    </tr>\n    <tr>\n      <th>193583</th>\n      <td>No Game No Life: Zero (2017)</td>\n      <td>Animation|Comedy|Fantasy</td>\n    </tr>\n    <tr>\n      <th>193585</th>\n      <td>Flint (2017)</td>\n      <td>Drama</td>\n    </tr>\n    <tr>\n      <th>193587</th>\n      <td>Bungo Stray Dogs: Dead Apple (2018)</td>\n      <td>Action|Animation</td>\n    </tr>\n    <tr>\n      <th>193609</th>\n      <td>Andrew Dice Clay: Dice Rules (1991)</td>\n      <td>Comedy</td>\n    </tr>\n  </tbody>\n</table>\n<p>9742 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies  # 需要 对 genres进行处理"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "movies[\"genres\"] = movies[\"genres\"].apply(lambda x:x.split(\"|\"))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "data": {
      "text/plain": "                                             title  \\\nmovieId                                              \n1                                 Toy Story (1995)   \n2                                   Jumanji (1995)   \n3                          Grumpier Old Men (1995)   \n4                         Waiting to Exhale (1995)   \n5               Father of the Bride Part II (1995)   \n...                                            ...   \n193581   Black Butler: Book of the Atlantic (2017)   \n193583                No Game No Life: Zero (2017)   \n193585                                Flint (2017)   \n193587         Bungo Stray Dogs: Dead Apple (2018)   \n193609         Andrew Dice Clay: Dice Rules (1991)   \n\n                                                    genres  \nmovieId                                                     \n1        [Adventure, Animation, Children, Comedy, Fantasy]  \n2                           [Adventure, Children, Fantasy]  \n3                                        [Comedy, Romance]  \n4                                 [Comedy, Drama, Romance]  \n5                                                 [Comedy]  \n...                                                    ...  \n193581                [Action, Animation, Comedy, Fantasy]  \n193583                        [Animation, Comedy, Fantasy]  \n193585                                             [Drama]  \n193587                                 [Action, Animation]  \n193609                                            [Comedy]  \n\n[9742 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title</th>\n      <th>genres</th>\n    </tr>\n    <tr>\n      <th>movieId</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>Toy Story (1995)</td>\n      <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Jumanji (1995)</td>\n      <td>[Adventure, Children, Fantasy]</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Grumpier Old Men (1995)</td>\n      <td>[Comedy, Romance]</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Waiting to Exhale (1995)</td>\n      <td>[Comedy, Drama, Romance]</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Father of the Bride Part II (1995)</td>\n      <td>[Comedy]</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>193581</th>\n      <td>Black Butler: Book of the Atlantic (2017)</td>\n      <td>[Action, Animation, Comedy, Fantasy]</td>\n    </tr>\n    <tr>\n      <th>193583</th>\n      <td>No Game No Life: Zero (2017)</td>\n      <td>[Animation, Comedy, Fantasy]</td>\n    </tr>\n    <tr>\n      <th>193585</th>\n      <td>Flint (2017)</td>\n      <td>[Drama]</td>\n    </tr>\n    <tr>\n      <th>193587</th>\n      <td>Bungo Stray Dogs: Dead Apple (2018)</td>\n      <td>[Action, Animation]</td>\n    </tr>\n    <tr>\n      <th>193609</th>\n      <td>Andrew Dice Clay: Dice Rules (1991)</td>\n      <td>[Comedy]</td>\n    </tr>\n  </tbody>\n</table>\n<p>9742 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [],
   "source": [
    "movies_index = set(movies.index) & set(tags.index)# 共有的movie"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "data": {
      "text/plain": "                                                       tag\nmovieId                                                   \n1                                      [pixar, pixar, fun]\n122882     [beautiful, cinematography, visually appealing]\n2        [fantasy, magic board game, Robin Williams, game]\n3                                             [moldy, old]\n5                                      [pregnancy, remake]\n...                                                    ...\n106489             [adventure, fantasy, Tolkien, too long]\n40955                                       [transvestite]\n8188                          [Not available from Netflix]\n8190                                    [In Netflix queue]\n8191                                         [Anne Boleyn]\n\n[1572 rows x 1 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>tag</th>\n    </tr>\n    <tr>\n      <th>movieId</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>[pixar, pixar, fun]</td>\n    </tr>\n    <tr>\n      <th>122882</th>\n      <td>[beautiful, cinematography, visually appealing]</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>[fantasy, magic board game, Robin Williams, game]</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>[moldy, old]</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>[pregnancy, remake]</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>106489</th>\n      <td>[adventure, fantasy, Tolkien, too long]</td>\n    </tr>\n    <tr>\n      <th>40955</th>\n      <td>[transvestite]</td>\n    </tr>\n    <tr>\n      <th>8188</th>\n      <td>[Not available from Netflix]</td>\n    </tr>\n    <tr>\n      <th>8190</th>\n      <td>[In Netflix queue]</td>\n    </tr>\n    <tr>\n      <th>8191</th>\n      <td>[Anne Boleyn]</td>\n    </tr>\n  </tbody>\n</table>\n<p>1572 rows × 1 columns</p>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_tags = tags.loc[list(movies_index)]\n",
    "new_tags"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "outputs": [
    {
     "data": {
      "text/plain": "                                             title  \\\nmovieId                                              \n1                                 Toy Story (1995)   \n2                                   Jumanji (1995)   \n3                          Grumpier Old Men (1995)   \n4                         Waiting to Exhale (1995)   \n5               Father of the Bride Part II (1995)   \n...                                            ...   \n193581   Black Butler: Book of the Atlantic (2017)   \n193583                No Game No Life: Zero (2017)   \n193585                                Flint (2017)   \n193587         Bungo Stray Dogs: Dead Apple (2018)   \n193609         Andrew Dice Clay: Dice Rules (1991)   \n\n                                                    genres  \\\nmovieId                                                      \n1        [Adventure, Animation, Children, Comedy, Fantasy]   \n2                           [Adventure, Children, Fantasy]   \n3                                        [Comedy, Romance]   \n4                                 [Comedy, Drama, Romance]   \n5                                                 [Comedy]   \n...                                                    ...   \n193581                [Action, Animation, Comedy, Fantasy]   \n193583                        [Animation, Comedy, Fantasy]   \n193585                                             [Drama]   \n193587                                 [Action, Animation]   \n193609                                            [Comedy]   \n\n                                                       tag  \nmovieId                                                     \n1                                      [pixar, pixar, fun]  \n2        [fantasy, magic board game, Robin Williams, game]  \n3                                             [moldy, old]  \n4                                                      NaN  \n5                                      [pregnancy, remake]  \n...                                                    ...  \n193581                                                 NaN  \n193583                                                 NaN  \n193585                                                 NaN  \n193587                                                 NaN  \n193609                                                 NaN  \n\n[9742 rows x 3 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title</th>\n      <th>genres</th>\n      <th>tag</th>\n    </tr>\n    <tr>\n      <th>movieId</th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>Toy Story (1995)</td>\n      <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n      <td>[pixar, pixar, fun]</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Jumanji (1995)</td>\n      <td>[Adventure, Children, Fantasy]</td>\n      <td>[fantasy, magic board game, Robin Williams, game]</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Grumpier Old Men (1995)</td>\n      <td>[Comedy, Romance]</td>\n      <td>[moldy, old]</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Waiting to Exhale (1995)</td>\n      <td>[Comedy, Drama, Romance]</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Father of the Bride Part II (1995)</td>\n      <td>[Comedy]</td>\n      <td>[pregnancy, remake]</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>193581</th>\n      <td>Black Butler: Book of the Atlantic (2017)</td>\n      <td>[Action, Animation, Comedy, Fantasy]</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>193583</th>\n      <td>No Game No Life: Zero (2017)</td>\n      <td>[Animation, Comedy, Fantasy]</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>193585</th>\n      <td>Flint (2017)</td>\n      <td>[Drama]</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>193587</th>\n      <td>Bungo Stray Dogs: Dead Apple (2018)</td>\n      <td>[Action, Animation]</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>193609</th>\n      <td>Andrew Dice Clay: Dice Rules (1991)</td>\n      <td>[Comedy]</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n<p>9742 rows × 3 columns</p>\n</div>"
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ret = movies.join(new_tags)\n",
    "ret"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "outputs": [],
   "source": [
    "temp = map(lambda x:(x[0],x[1],x[2],x[2]+x[3]) if x[3] is not np.nan else (x[0],x[1],x[2],[]),ret.itertuples())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "outputs": [
    {
     "data": {
      "text/plain": "                                             title  \\\nmovieId                                              \n1                                 Toy Story (1995)   \n2                                   Jumanji (1995)   \n3                          Grumpier Old Men (1995)   \n4                         Waiting to Exhale (1995)   \n5               Father of the Bride Part II (1995)   \n...                                            ...   \n193581   Black Butler: Book of the Atlantic (2017)   \n193583                No Game No Life: Zero (2017)   \n193585                                Flint (2017)   \n193587         Bungo Stray Dogs: Dead Apple (2018)   \n193609         Andrew Dice Clay: Dice Rules (1991)   \n\n                                                    genres  \\\nmovieId                                                      \n1        [Adventure, Animation, Children, Comedy, Fantasy]   \n2                           [Adventure, Children, Fantasy]   \n3                                        [Comedy, Romance]   \n4                                 [Comedy, Drama, Romance]   \n5                                                 [Comedy]   \n...                                                    ...   \n193581                [Action, Animation, Comedy, Fantasy]   \n193583                        [Animation, Comedy, Fantasy]   \n193585                                             [Drama]   \n193587                                 [Action, Animation]   \n193609                                            [Comedy]   \n\n                                                      tags  \nmovieId                                                     \n1        [Adventure, Animation, Children, Comedy, Fanta...  \n2        [Adventure, Children, Fantasy, fantasy, magic ...  \n3                            [Comedy, Romance, moldy, old]  \n4                                                       []  \n5                              [Comedy, pregnancy, remake]  \n...                                                    ...  \n193581                                                  []  \n193583                                                  []  \n193585                                                  []  \n193587                                                  []  \n193609                                                  []  \n\n[9742 rows x 3 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title</th>\n      <th>genres</th>\n      <th>tags</th>\n    </tr>\n    <tr>\n      <th>movieId</th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>Toy Story (1995)</td>\n      <td>[Adventure, Animation, Children, Comedy, Fantasy]</td>\n      <td>[Adventure, Animation, Children, Comedy, Fanta...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Jumanji (1995)</td>\n      <td>[Adventure, Children, Fantasy]</td>\n      <td>[Adventure, Children, Fantasy, fantasy, magic ...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Grumpier Old Men (1995)</td>\n      <td>[Comedy, Romance]</td>\n      <td>[Comedy, Romance, moldy, old]</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Waiting to Exhale (1995)</td>\n      <td>[Comedy, Drama, Romance]</td>\n      <td>[]</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Father of the Bride Part II (1995)</td>\n      <td>[Comedy]</td>\n      <td>[Comedy, pregnancy, remake]</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>193581</th>\n      <td>Black Butler: Book of the Atlantic (2017)</td>\n      <td>[Action, Animation, Comedy, Fantasy]</td>\n      <td>[]</td>\n    </tr>\n    <tr>\n      <th>193583</th>\n      <td>No Game No Life: Zero (2017)</td>\n      <td>[Animation, Comedy, Fantasy]</td>\n      <td>[]</td>\n    </tr>\n    <tr>\n      <th>193585</th>\n      <td>Flint (2017)</td>\n      <td>[Drama]</td>\n      <td>[]</td>\n    </tr>\n    <tr>\n      <th>193587</th>\n      <td>Bungo Stray Dogs: Dead Apple (2018)</td>\n      <td>[Action, Animation]</td>\n      <td>[]</td>\n    </tr>\n    <tr>\n      <th>193609</th>\n      <td>Andrew Dice Clay: Dice Rules (1991)</td>\n      <td>[Comedy]</td>\n      <td>[]</td>\n    </tr>\n  </tbody>\n</table>\n<p>9742 rows × 3 columns</p>\n</div>"
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie_dataset = pd.DataFrame(temp,columns=[\"movieId\",\"title\",\"genres\",\"tags\"])\n",
    "movie_dataset.set_index(\"movieId\",inplace=True)\n",
    "movie_dataset"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "outputs": [],
   "source": [
    "dataset = movie_dataset[\"tags\"].values"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "outputs": [
    {
     "data": {
      "text/plain": "array([list(['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy', 'pixar', 'pixar', 'fun']),\n       list(['Adventure', 'Children', 'Fantasy', 'fantasy', 'magic board game', 'Robin Williams', 'game']),\n       list(['Comedy', 'Romance', 'moldy', 'old']), ..., list([]),\n       list([]), list([])], dtype=object)"
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "outputs": [],
   "source": [
    "from gensim.corpora import Dictionary\n",
    "from gensim.models import TfidfModel"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "outputs": [],
   "source": [
    "# 创建 dictionary对象\n",
    "dct = Dictionary(dataset)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "outputs": [
    {
     "data": {
      "text/plain": "{0: 'Adventure',\n 1: 'Animation',\n 2: 'Children',\n 3: 'Comedy',\n 4: 'Fantasy',\n 5: 'fun',\n 6: 'pixar',\n 7: 'Robin Williams',\n 8: 'fantasy',\n 9: 'game',\n 10: 'magic board game',\n 11: 'Romance',\n 12: 'moldy',\n 13: 'old',\n 14: 'pregnancy',\n 15: 'remake',\n 16: 'Drama',\n 17: 'politics',\n 18: 'president',\n 19: 'Crime',\n 20: 'Mafia',\n 21: 'Jane Austen',\n 22: 'Hollywood',\n 23: 'Thriller',\n 24: 'Horror',\n 25: 'Mystery',\n 26: 'serial killer',\n 27: 'alcoholism',\n 28: 'Shakespeare',\n 29: 'In Netflix queue',\n 30: 'Sci-Fi',\n 31: 'kidnapping',\n 32: 'high school',\n 33: 'teacher',\n 34: 'Brad Pitt',\n 35: 'Bruce Willis',\n 36: 'Post apocalyptic',\n 37: 'mindfuck',\n 38: 'post-apocalyptic',\n 39: 'time travel',\n 40: 'twist ending',\n 41: 'Animal movie',\n 42: 'pigs',\n 43: 'villain nonexistent or not needed for good story',\n 44: 'Nun',\n 45: 'death penalty',\n 46: 'twins',\n 47: 'Emma',\n 48: 'Paul Rudd',\n 49: 'chick flick',\n 50: 'funny',\n 51: 'quotable',\n 52: 'seen more than once',\n 53: 'South Africa',\n 54: 'War',\n 55: 'England',\n 56: 'Journalism',\n 57: 'wedding',\n 58: 'mystery',\n 59: 'heist',\n 60: 'suspense',\n 61: 'thriller',\n 62: 'tricky',\n 63: 'adoption',\n 64: 'prostitution',\n 65: 'writing',\n 66: 'music',\n 67: 'Jekyll and Hyde',\n 68: 'theater',\n 69: 'crime',\n 70: 'off-beat comedy',\n 71: 'quirky',\n 72: 'Adam Sandler',\n 73: 'golf',\n 74: 'test tag',\n 75: 'Musical',\n 76: 'muppets',\n 77: 'Action',\n 78: 'Medieval',\n 79: 'Oscar (Best Cinematography)',\n 80: 'Scotland',\n 81: 'beautiful scenery',\n 82: 'epic',\n 83: 'historical',\n 84: 'inspirational',\n 85: 'mel gibson',\n 86: 'revenge',\n 87: 'sword fight',\n 88: 'assassination',\n 89: 'Documentary',\n 90: 'Holocaust',\n 91: 'dating',\n 92: 'journalism',\n 93: 'Leonardo DiCaprio',\n 94: 'Mark Wahlberg',\n 95: 'addiction',\n 96: 'heroin',\n 97: 'IMAX',\n 98: 'NASA',\n 99: 'moon',\n 100: 'space',\n 101: 'superhero',\n 102: 'Michael Crichton',\n 103: 'submarine',\n 104: 'New York City',\n 105: 'Nudity (Full Frontal)',\n 106: 'controversial',\n 107: 'computers',\n 108: 'Made me cry',\n 109: 'generation X',\n 110: 'school',\n 111: 'stop looking at me swan',\n 112: 'Ireland',\n 113: 'cynical',\n 114: 'hilarious',\n 115: 'independent film',\n 116: 'witty',\n 117: 'mental illness',\n 118: 'psychology',\n 119: 'Stephen King',\n 120: 'movie business',\n 121: 'France',\n 122: 'basketball',\n 123: 'infertility',\n 124: 'Australia',\n 125: 'Sinbad',\n 126: 'bad',\n 127: 'Beethoven',\n 128: 'Einstein',\n 129: 'court',\n 130: 'EPIC',\n 131: 'Nerd',\n 132: 'ROBOTS AND ANDROIDS',\n 133: 'Star Wars',\n 134: 'action',\n 135: 'classic',\n 136: 'classic sci-fi',\n 137: 'darth vader',\n 138: 'engrossing adventure',\n 139: 'great soundtrack',\n 140: 'luke skywalker',\n 141: 'oldie but goodie',\n 142: 'sci-fi',\n 143: 'space action',\n 144: 'space adventure',\n 145: 'space epic',\n 146: 'space opera',\n 147: 'Louisa May Alcott',\n 148: 'Girl Power',\n 149: 'India',\n 150: 'gothic',\n 151: 'Christmas',\n 152: 'prison',\n 153: 'disability',\n 154: 'brutality',\n 155: 'dark comedy',\n 156: 'psychedelic',\n 157: 'satire',\n 158: 'stylized',\n 159: 'French',\n 160: 'Gary Oldman',\n 161: 'Guns',\n 162: 'Jean Reno',\n 163: 'Lolita theme',\n 164: 'Luc Besson',\n 165: 'Natalie Portman',\n 166: 'assassin',\n 167: 'assassins',\n 168: 'awkward romance',\n 169: 'corruption',\n 170: 'disturbing',\n 171: 'drama',\n 172: 'friendship',\n 173: 'great acting',\n 174: 'hit men',\n 175: 'hitman',\n 176: 'humorous',\n 177: 'imdb top 250',\n 178: 'loneliness',\n 179: 'love story',\n 180: 'organized crime',\n 181: 'police',\n 182: 'police corruption',\n 183: 'sniper',\n 184: 'tense',\n 185: 'touching',\n 186: 'unique',\n 187: '1990s',\n 188: 'AWESOME',\n 189: 'Black comedy',\n 190: 'Harvey Keitel',\n 191: 'Highly quotable',\n 192: 'John Travolta',\n 193: \"Palme d'Or\",\n 194: 'Quentin Tarantino',\n 195: 'Quotable',\n 196: 'Roger Avary',\n 197: 'Samuel L. Jackson',\n 198: 'Steve Buscemi',\n 199: 'Tarantino',\n 200: 'Uma Thurman',\n 201: 'achronological',\n 202: 'action packed',\n 203: 'aggressive',\n 204: 'amazing',\n 205: 'amazing dialogues',\n 206: 'anthology',\n 207: 'atmospheric',\n 208: 'bad ass',\n 209: 'bad language',\n 210: 'bad-ass',\n 211: 'bible',\n 212: 'biblical references',\n 213: 'big boys with guns',\n 214: 'big name actors',\n 215: 'black humor',\n 216: 'black humour',\n 217: 'blood',\n 218: 'blood splatters',\n 219: 'bloody',\n 220: 'bruce willis',\n 221: 'casual violence',\n 222: 'character development',\n 223: 'characters',\n 224: 'classic movie',\n 225: 'coke',\n 226: 'comedy',\n 227: 'conversation',\n 228: 'cool',\n 229: 'cool style',\n 230: 'crime scene scrubbing',\n 231: 'cult',\n 232: 'cult classic',\n 233: 'cult film',\n 234: 'dance',\n 235: 'dancing',\n 236: 'dark',\n 237: 'dark humor',\n 238: 'dialogue',\n 239: 'different',\n 240: 'diner',\n 241: 'disjointed timeline',\n 242: 'drug overdose',\n 243: 'drugs',\n 244: 'drugs & music',\n 245: 'ensemble cast',\n 246: 'entertaining',\n 247: 'entirely dialogue',\n 248: 'episodic',\n 249: 'exciting',\n 250: 'fast paced',\n 251: 'fast-paced',\n 252: 'film noir',\n 253: 'film-noir',\n 254: 'foul language',\n 255: 'gangster',\n 256: 'gangsters',\n 257: 'genius',\n 258: 'golden watch',\n 259: 'good dialogue',\n 260: 'good music',\n 261: 'gore',\n 262: 'great dialogue',\n 263: 'gritty',\n 264: 'guns',\n 265: 'homosexuality',\n 266: 'humour',\n 267: 'iconic',\n 268: 'innovative',\n 269: 'intellectual',\n 270: 'intelligent',\n 271: 'intense',\n 272: 'interesting',\n 273: 'intertwining storylines',\n 274: 'interwoven storylines',\n 275: 'ironic',\n 276: 'irony',\n 277: 'killer-as-protagonist',\n 278: 'los angeles',\n 279: 'masterpiece',\n 280: 'meaningless violence',\n 281: 'milkshake',\n 282: 'mobster',\n 283: 'mobsters',\n 284: 'monologue',\n 285: 'motherfucker',\n 286: 'multiple stories',\n 287: 'multiple storylines',\n 288: 'neo-noir',\n 289: 'noir',\n 290: 'non-linear',\n 291: 'non-linear timeline',\n 292: 'nonlinear',\n 293: 'nonlinear narrative',\n 294: 'nonlinear storyline',\n 295: 'nonlinear timeline',\n 296: 'notable soundtrack',\n 297: 'offensive',\n 298: 'organised crime',\n 299: 'original',\n 300: 'original plot',\n 301: 'out of order',\n 302: 'parody',\n 303: 'philosophical',\n 304: 'pop culture references',\n 305: 'psychological',\n 306: 'pulp',\n 307: 'r:disturbing violent content including rape',\n 308: 'r:disturbing violent images',\n 309: 'r:graphic sexuality',\n 310: 'r:some violence',\n 311: 'r:strong bloody violence',\n 312: 'r:strong language',\n 313: 'r:sustained strong stylized violence',\n 314: 'r:violence',\n 315: 'random',\n 316: 'rape',\n 317: 'retro',\n 318: 'royal with cheese',\n 319: 'sarcasm',\n 320: 'sexy',\n 321: 'smart writing',\n 322: 'sophisticated',\n 323: 'soundtrack',\n 324: 'splatter',\n 325: 'storytelling',\n 326: 'stylish',\n 327: 'thought-provoking',\n 328: 'travolta',\n 329: 'unpredictable',\n 330: 'unusual',\n 331: 'very funny',\n 332: 'violence',\n 333: 'violent',\n 334: 'TV',\n 335: 'Death',\n 336: 'marriage',\n 337: 'Morgan Freeman',\n 338: 'wrongful imprisonment',\n 339: 'Enterprise',\n 340: 'coma',\n 341: 'weddings',\n 342: 'cross dressing',\n 343: 'men in drag',\n 344: 'remade',\n 345: 'Tom Clancy',\n 346: 'John Grisham',\n 347: 'interracial romance',\n 348: 'Vietnam',\n 349: 'bittersweet',\n 350: 'bubba gump shrimp',\n 351: 'emotional',\n 352: 'heartwarming',\n 353: 'lieutenant dan',\n 354: 'shrimp',\n 355: 'stupid is as stupid does',\n 356: 'gambling',\n 357: 'Disney',\n 358: 'Disney animated feature',\n 359: 'Oscar (Best Music - Original Score)',\n 360: 'bus',\n 361: 'spies',\n 362: 'Christina Ricci',\n 363: 'Christopher Lloyd',\n 364: 'black comedy',\n 365: 'family',\n 366: 'Edith Wharton',\n 367: 'horses',\n 368: 'Oscar (Best Actress)',\n 369: 'Al Pacino',\n 370: 'mafia',\n 371: 'President',\n 372: 'based on a TV show',\n 373: 'hula hoop',\n 374: 'biopic',\n 375: 'Dinosaur',\n 376: 'Japan',\n 377: 'sexuality',\n 378: 'divorce',\n 379: 'AIDs',\n 380: 'radio',\n 381: 'show business',\n 382: 'Butler',\n 383: 'Housekeeper',\n 384: 'military',\n 385: 'racism',\n 386: 'football',\n 387: 'based on a true story',\n 388: 'biography',\n 389: 'holocaust',\n 390: 'moving',\n 391: 'chess',\n 392: 'C.S. Lewis',\n 393: 'large cast',\n 394: 'race',\n 395: 'Empire State Building',\n 396: 'Philip K. Dick',\n 397: 'androids',\n 398: 'artificial intelligence',\n 399: 'cyberpunk',\n 400: 'dreamlike',\n 401: 'existentialism',\n 402: 'future',\n 403: 'robots',\n 404: 'beat poetry',\n 405: 'Halloween',\n 406: 'knights',\n 407: 'adolescence',\n 408: 'christmas',\n 409: 'overrated',\n 410: 'Arnold Schwarzenegger',\n 411: 'Scifi masterpiece',\n 412: 'Suspense',\n 413: 'apocalypse',\n 414: 'nuclear war',\n 415: 'American Indians',\n 416: 'Native Americans',\n 417: 'Western',\n 418: 'Hannibal Lector',\n 419: 'Coen Brothers',\n 420: 'KIDNAPPING',\n 421: 'edward norton',\n 422: 'priest',\n 423: 'babies',\n 424: 'Gulf War',\n 425: 'spoof',\n 426: 'Bugs Bunny',\n 427: 'Veterinarian',\n 428: 'Aardman',\n 429: 'Alcatraz',\n 430: 'Michael Bay',\n 431: 'terrorism',\n 432: 'Disaster',\n 433: 'aliens',\n 434: 'Atomic bomb',\n 435: 'Quirky',\n 436: 'Slim Pickens',\n 437: 'purity of essence',\n 438: 'based on a book',\n 439: 'drug abuse',\n 440: 'narrated',\n 441: 'adultery',\n 442: 'GIVE ME BACK MY SON!',\n 443: 'It was melodramatic and kind of dumb',\n 444: 'Mel Gibson',\n 445: 'seen at the cinema',\n 446: 'Capote',\n 447: 'Alfred Hitchcock',\n 448: 'Atmospheric',\n 449: 'James Stewart',\n 450: 'falling',\n 451: 'photographer',\n 452: 'photography',\n 453: 'voyeurism',\n 454: 'Screwball',\n 455: 'Brooch',\n 456: 'Mount Rushmore',\n 457: 'start of a beautiful friendship',\n 458: 'Film-Noir',\n 459: 'statue',\n 460: 'George Bernard Shaw',\n 461: 'rich guy - poor girl',\n 462: 'Italy',\n 463: 'royalty',\n 464: '1900s',\n 465: 'Dorothy',\n 466: 'Toto',\n 467: 'Civil War',\n 468: 'television',\n 469: 'eerie',\n 470: 'movies',\n 471: 'Rosebud',\n 472: 'Arthur C. Clarke',\n 473: 'Dull',\n 474: 'Hal',\n 475: 'Oscar (Best Effects - Visual Effects)',\n 476: 'Stanley Kubrick',\n 477: 'apes',\n 478: 'cinematography',\n 479: 'computer',\n 480: 'confusing ending',\n 481: 'futuristic',\n 482: 'meditative',\n 483: 'relaxing',\n 484: 'revolutionary',\n 485: 'setting:space/space ship',\n 486: 'slow',\n 487: 'slow paced',\n 488: 'space travel',\n 489: 'spacecraft',\n 490: 'superb soundtrack',\n 491: 'surreal',\n 492: 'technology',\n 493: 'tedious',\n 494: 'visual',\n 495: 'visually appealing',\n 496: 'Mrs. DeWinter',\n 497: 'Europe',\n 498: 'war',\n 499: 'amnesia',\n 500: 'Cold War',\n 501: 'Russia',\n 502: 'swashbuckler',\n 503: 'ghosts',\n 504: 'Shangri-La',\n 505: 'Astaire and Rogers',\n 506: 'butler',\n 507: 'homeless',\n 508: 'screwball',\n 509: 'oil',\n 510: 'Nick and Nora Charles',\n 511: 'Politics',\n 512: 'leopard',\n 513: 'fugitive',\n 514: 'zombies',\n 515: 'missionary',\n 516: 'Tennessee Williams',\n 517: 'Hemingway',\n 518: 'food',\n 519: 'King Arthur',\n 520: 'nanny',\n 521: 'Rogers and Hammerstein',\n 522: 'Music',\n 523: 'Amazing Cinematography',\n 524: 'shakespeare',\n 525: 'updated classics',\n 526: 'anti-Semitism',\n 527: 'governess',\n 528: 'fish',\n 529: 'Bible',\n 530: 'religion',\n 531: '1920s',\n 532: '1960s',\n 533: 'Jim Morrison',\n 534: 'Navy',\n 535: 'predictable',\n 536: '1950s',\n 537: 'freedom of expression',\n 538: 'aging',\n 539: 'Clousseau',\n 540: 'Monty Python',\n 541: 'british comedy',\n 542: 'boxing',\n 543: 'jack nicholson',\n 544: 'George Lucas',\n 545: 'Harrison Ford',\n 546: 'I am your father',\n 547: 'sequel',\n 548: 'Inigo Montoya',\n 549: 'six-fingered man',\n 550: 'Steven Spielberg',\n 551: 'adventure',\n 552: 'archaeology',\n 553: 'ark of the covenant',\n 554: 'indiana jones',\n 555: 'treasure hunt',\n 556: 'SPACE TRAVEL',\n 557: 'horror',\n 558: 'space craft',\n 559: 'spaghetti western',\n 560: 'Motivational',\n 561: 'claustrophobic',\n 562: 'confrontational',\n 563: 'earnest',\n 564: 'great screenplay',\n 565: 'Middle East',\n 566: 'brainwashing',\n 567: 'Harper Lee',\n 568: 'Venice',\n 569: 'ferris wheel',\n 570: 'zither',\n 571: 'samurai',\n 572: 'Norman Bates',\n 573: 'black and white',\n 574: 'suspenseful',\n 575: 'Saturday Night Live',\n 576: 'anti-war',\n 577: 'Mozart',\n 578: 'Salieri',\n 579: 'New York',\n 580: 'The Entertainer',\n 581: 'May-December romance',\n 582: 'cerebral',\n 583: 'death',\n 584: 'reflective',\n 585: 'special effects',\n 586: 'Shakespeare sort of',\n 587: 'High School',\n 588: 'highschool',\n 589: 'Simon and Garfunkel',\n 590: 'POW',\n 591: 'incest',\n 592: 'Cold',\n 593: 'skiing',\n 594: 'chilly',\n 595: 'creepy',\n 596: 'menacing',\n 597: 'oninous',\n 598: 'murder',\n 599: 'World War II',\n 600: 'animation',\n 601: 'anime',\n 602: 'visually stunning',\n 603: 'Nazis',\n 604: 'gunfight',\n 605: 'Hammett',\n 606: 'heavy metal',\n 607: 'mockumentary',\n 608: 'Holy Grail',\n 609: 'E. M. Forster',\n 610: 'Cambodia',\n 611: 'baseball',\n 612: 'birds',\n 613: 'Juliette Lewis',\n 614: 'Martin Scorsese',\n 615: 'Robert De Niro',\n 616: 'lawyer',\n 617: 'prom',\n 618: 'vampires',\n 619: 'demons',\n 620: 'personals ads',\n 621: 'Borg',\n 622: 'ex-con',\n 623: 'Arthur Miller',\n 624: 'dogs',\n 625: 'Klingons',\n 626: 'Captain Kirk',\n 627: 'whales',\n 628: 'Shark',\n 629: 'shark',\n 630: 'sports',\n 631: 'spying',\n 632: 'slasher',\n 633: 'Hawkeye',\n 634: 'James Fennimore Cooper',\n 635: 'Conan',\n 636: 'Andrew Lloyd Weber',\n 637: 'Not Seen',\n 638: 'jungle',\n 639: 'Tolstoy',\n 640: 'reunion',\n 641: 'ballroom dance',\n 642: 'dinosaurs',\n 643: 'romantic comedy',\n 644: 'transplants',\n 645: 'Police',\n 646: 'plot twist',\n 647: '1970s',\n 648: 'Queen Victoria',\n 649: 'lawyers',\n 650: 'Henry James',\n 651: 'Amish',\n 652: 'alternate universe',\n 653: 'Savannah',\n 654: 'Canada',\n 655: 'romance',\n 656: 'shipwreck',\n 657: 'Kevin Costner',\n 658: 'Cult classic',\n 659: 'Jeff Bridges',\n 660: 'John Goodman',\n 661: 'Julianne Moore',\n 662: 'Philip Seymour Hoffman',\n 663: 'bowling',\n 664: 'coen brothers',\n 665: 'deadpan',\n 666: 'marijuana',\n 667: 'ransom',\n 668: 'rug',\n 669: 'satirical',\n 670: 'Charles Dickens',\n 671: '1980s',\n 672: 'good',\n 673: 'No DVD at Netflix',\n 674: 'conspiracy',\n 675: 'avant-garde romantic comedy',\n 676: 'elegiac',\n 677: 'melancholy',\n 678: 'wry',\n 679: 'artsy',\n 680: 'enigmatic',\n 681: 'hallucinatory',\n 682: 'insanity',\n 683: 'mathematics',\n 684: 'paranoia',\n 685: 'paranoid',\n 686: 'Ben Stiller',\n 687: 'crude humor',\n 688: 'goofy',\n 689: 'ships',\n 690: 'Judaism',\n 691: 'Huey Long',\n 692: 'Robert Penn Warren',\n 693: 'coulda been a contender',\n 694: 'Gangs',\n 695: 'British',\n 696: 'Dickens',\n 697: 'Olympics',\n 698: 'Africa',\n 699: 'China',\n 700: 'autism',\n 701: 'rasicm',\n 702: 'Jason',\n 703: 'halloween',\n 704: 'California',\n 705: 'Mexico',\n 706: 'Katzanzakis',\n 707: 'Andy Garcia',\n 708: 'Classic',\n 709: 'Francis Ford Coppola',\n 710: 'General Motors',\n 711: 'Michigan',\n 712: 'memory',\n 713: 'suburbia',\n 714: 'Ray Bradbury',\n 715: 'carnival',\n 716: 'mermaid',\n 717: 'weather forecaster',\n 718: 'S.E. Hinton',\n 719: 'Tolkein',\n 720: 'Family',\n 721: 'beautiful',\n 722: 'mice',\n 723: 'post-college',\n 724: 'scary',\n 725: 'Boston',\n 726: 'aquarium',\n 727: 'Capone',\n 728: 'survival',\n 729: 'small towns',\n 730: 'train',\n 731: 'In Your Eyes',\n 732: 'Lloyd Dobbler',\n 733: 'freaks',\n 734: 'SNL',\n 735: 'Broadway',\n 736: 'Heartwarming',\n 737: 'poignant',\n 738: 'sentimental',\n 739: 'tear jerker',\n 740: 'tearjerking',\n 741: 'Pixar',\n 742: 'South America',\n 743: 'Star Trek',\n 744: 'cameo:Whoopi Goldberg',\n 745: 'Moses',\n 746: 'martial arts',\n 747: 'e-mail',\n 748: 'doctors',\n 749: 'Hungary',\n 750: 'stapler',\n 751: 'workplace',\n 752: 'Nicolas Cage',\n 753: 'evil children',\n 754: 'Siam',\n 755: 'philosophy',\n 756: 'post apocalyptic',\n 757: 'Christopher Nolan',\n 758: 'Twist Ending',\n 759: 'directorial debut',\n 760: 'not linear',\n 761: 'southern US',\n 762: 'golfing',\n 763: 'virtual reality',\n 764: 'motherhood',\n 765: 'comic book',\n 766: 'prequel',\n 767: 'the Force',\n 768: 'alternate endings',\n 769: 'spiders',\n 770: 'Trey Parker',\n 771: 'adult humor',\n 772: 'free speech',\n 773: 'south park',\n 774: 'Chris Klein',\n 775: 'Jason Biggs',\n 776: 'Seann William Scott',\n 777: 'best comedy',\n 778: 'dumb',\n 779: 'not funny',\n 780: 'pizza beer',\n 781: 'teen',\n 782: 'claims to be true',\n 783: 'video',\n 784: 'Ghosts',\n 785: 'Rome',\n 786: 'crucifixion',\n 787: 'slavery',\n 788: 'gentle',\n 789: 'lyrical',\n 790: 'Missionary',\n 791: 'Priest',\n 792: 'nostalgia',\n 793: 'I see dead people',\n 794: 'aviation',\n 795: 'children',\n 796: 'Beatles',\n 797: 'river',\n 798: 'Academy award (Best Supporting Actress)',\n 799: 'Indonesia',\n 800: 'nightclub',\n 801: 'island',\n 802: 'Macaulay Culkin',\n 803: 'Chuck Palahniuk',\n 804: 'David Fincher',\n 805: 'Edward Norton',\n 806: 'Nudity (Topless)',\n 807: 'Palahnuik',\n 808: 'TERRORISM',\n 809: 'challenging',\n 810: 'clever',\n 811: 'complicated',\n 812: 'consumerism',\n 813: 'double life',\n 814: 'fighting',\n 815: 'helena bonham carter',\n 816: 'imaginary friend',\n 817: 'mind-blowing',\n 818: 'postmodern',\n 819: 'powerful ending',\n 820: 'psychological thriller',\n 821: 'schizophrenia',\n 822: 'social commentary',\n 823: 'societal criticism',\n 824: 'twist',\n 825: 'brothers',\n 826: 'lawn mower',\n 827: 'live action/animation',\n 828: 'Studio Ghibli',\n 829: 'fantasy world',\n 830: 'tobacco',\n 831: 'true story',\n 832: 'dance marathon',\n 833: 'Dan Aykroyd',\n 834: 'class',\n 835: 'camp',\n 836: 'Kevin Smith',\n 837: 'irreverent',\n 838: 'jay and silent bob',\n 839: 'Up series',\n 840: 'Ichabod Crane',\n 841: 'new york',\n 842: 'depression',\n 843: 'dust bowl',\n 844: 'Tom Hanks',\n 845: 'Graham Greene',\n 846: 'downbeat',\n 847: 'fatherhood',\n 848: 'black-and-white',\n 849: 'L.A.',\n 850: 'Andy Kaufman',\n 851: 'Jude Law',\n 852: 'obsession',\n 853: 'secrets',\n 854: 'Brittany Murphy',\n 855: 'Mental Hospital',\n 856: 'asylum',\n 857: 'winona ryder',\n 858: 'a dingo ate my baby',\n 859: 'nuns',\n 860: 'plastic surgery',\n 861: 'E.M. Forster',\n 862: 'gun fu',\n 863: 'heroic bloodshed',\n 864: 'crazy',\n 865: 'figure skating',\n 866: 'big top',\n 867: 'circus',\n 868: 'blind',\n 869: 'orphans',\n 870: 'subway',\n 871: 'Peace Corp',\n 872: 'scandal',\n 873: 'TOGA',\n 874: 'college',\n 875: 'great cinematography',\n 876: 'aardman',\n 877: 'Insurance',\n 878: 'Pearl S Buck',\n 879: 'Hepburn and Tracy',\n 880: 'interracial marriage',\n 881: 'prejudice',\n 882: 'factory',\n 883: 'pool',\n 884: 'evolution',\n 885: 'Nick Hornby',\n 886: 'Peter Pan',\n 887: 'stephen king',\n 888: 'families',\n 889: 'Jesse Ventura',\n 890: 'macho',\n 891: 'scifi cult',\n 892: 'psychiatrist',\n 893: 'rabbi',\n 894: 'Gambling',\n 895: 'sofia coppola',\n 896: 'suicide',\n 897: 'business',\n 898: 'Epic',\n 899: 'Romans',\n 900: 'Russell Crowe',\n 901: 'ancient Rome',\n 902: 'history',\n 903: 'Pee Wee Herman',\n 904: 'mining',\n 905: 'easygoing',\n 906: 'silly',\n 907: 'Insane',\n 908: 'art house',\n 909: 'cryptic',\n 910: 'fucked up',\n 911: 'gruesome',\n 912: 'strange',\n 913: 'weird',\n 914: 'hugh jackman',\n 915: 'marvel',\n 916: 'missing children',\n 917: 'diabetes',\n 918: 'televangelist',\n 919: 'Rolling Stone',\n 920: 'blindness',\n 921: 'Dogs',\n 922: 'invisibility',\n 923: 'depressing',\n 924: 'ballet',\n 925: 'Dr. Seuss',\n 926: 'comics',\n 927: 'father-son relationship',\n 928: 'somber',\n 929: 'china',\n 930: 'stand-up comedy',\n 931: 'pageant',\n 932: 'bluegrass',\n 933: 'Quakers',\n 934: 'psychopaths',\n 935: 'Beautiful',\n 936: 'Unique',\n 937: 'dreamy',\n 938: 'elegant',\n 939: 'heartbreaking',\n 940: 'intimate',\n 941: 'long takes',\n 942: 'longing',\n 943: 'melancholic',\n 944: 'moody',\n 945: 'nocturnal',\n 946: 'romantic',\n 947: 'reality TV',\n 948: \"Eugene O'Neill\",\n 949: 'preacher',\n 950: 'virginity',\n 951: 'Hannibal Lecter',\n 952: 'von Bulow',\n 953: 'Ed Harris',\n 954: 'Rachel Weisz',\n 955: 'Backwards. memory',\n 956: 'Mindfuck',\n 957: 'singletons',\n 958: 'Nabokov',\n 959: 'Union',\n 960: 'fairy tales',\n 961: 'Strangers on a Train',\n 962: 'really bad',\n 963: 'Bittersweet',\n 964: 'android(s)/cyborg(s)',\n 965: 'accident',\n 966: 'boring',\n 967: 'claymation',\n 968: 'creativity',\n 969: 'dystopia',\n 970: 'free to download',\n 971: 'imagination',\n 972: 'no dialogue',\n 973: 'immigrants',\n 974: 'hippies',\n 975: '\"artsy\"',\n 976: 'reciprocal spectator',\n 977: 'Well Done',\n 978: 'mental hospital',\n 979: 'prodigies',\n 980: 'Nuclear disaster',\n 981: 'union',\n 982: 'David Bowie',\n 983: 'Will Ferrell',\n 984: 'ben stiller',\n 985: 'mindless one liners',\n 986: 'Loretta Lynn',\n 987: 'Tradition!',\n 988: 'jake gyllenhaal',\n 989: 'Everything you want is here',\n 990: 'Magic',\n 991: 'Wizards',\n 992: 'alan rickman',\n 993: 'harry potter',\n 994: 'singers',\n 995: 'whimsical',\n 996: 'high fantasy',\n 997: 'mythology',\n 998: 'tolkien',\n 999: 'wizards',\n ...}"
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dict(dct)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "outputs": [],
   "source": [
    "corpus = [dct.doc2bow(line) for line in dataset]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "outputs": [
    {
     "data": {
      "text/plain": "[[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (6, 2)],\n [(0, 1), (2, 1), (4, 1), (7, 1), (8, 1), (9, 1), (10, 1)],\n [(3, 1), (11, 1), (12, 1), (13, 1)],\n [],\n [(3, 1), (14, 1), (15, 1)],\n [],\n [(3, 1), (11, 1), (15, 1)],\n [],\n [],\n [],\n [(3, 1), (11, 1), (16, 1), (17, 1), (18, 1)],\n [],\n [],\n [(16, 1), (17, 1), (18, 1)],\n [],\n [(16, 1), (19, 1), (20, 1)],\n [(11, 1), (16, 1), (21, 1)],\n [],\n [],\n [],\n [(3, 1), (19, 1), (22, 1), (23, 1)],\n [(16, 1), (19, 1), (23, 1), (24, 1), (25, 1), (26, 1)],\n [],\n [],\n [(11, 1), (16, 1), (27, 1)],\n [(16, 1), (28, 1)],\n [],\n [(11, 1), (16, 1), (21, 1), (29, 1)],\n [(0, 1), (4, 1), (16, 1), (25, 1), (30, 1), (31, 1)],\n [],\n [(16, 1), (32, 1), (33, 1)],\n [(15, 1),\n  (23, 1),\n  (25, 1),\n  (30, 1),\n  (34, 1),\n  (35, 1),\n  (36, 1),\n  (37, 1),\n  (38, 1),\n  (39, 3),\n  (40, 1)],\n [(2, 1), (16, 1), (41, 1), (42, 1), (43, 1)],\n [(16, 1), (19, 1), (44, 1), (45, 1)],\n [(2, 1), (3, 1), (46, 1)],\n [(3, 1),\n  (11, 1),\n  (21, 1),\n  (47, 1),\n  (48, 1),\n  (49, 1),\n  (50, 1),\n  (51, 1),\n  (52, 1)],\n [(16, 1), (29, 1), (53, 1)],\n [(16, 1), (28, 1), (54, 1)],\n [],\n [(16, 1), (55, 1)],\n [],\n [(3, 1), (16, 1), (23, 1), (56, 1)],\n [(11, 1), (16, 1), (57, 1)],\n [(23, 1), (25, 1), (26, 1), (40, 1), (58, 1)],\n [],\n [],\n [(19, 1),\n  (23, 1),\n  (25, 1),\n  (37, 1),\n  (40, 1),\n  (59, 1),\n  (60, 1),\n  (61, 1),\n  (62, 1)],\n [(3, 1), (11, 1), (16, 1), (63, 1), (64, 1)],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (16, 1), (65, 1)],\n [],\n [],\n [(16, 1), (66, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (23, 1), (24, 1), (67, 1)],\n [],\n [],\n [],\n [(3, 1), (16, 1), (68, 1)],\n [],\n [],\n [],\n [(0, 1), (3, 1), (11, 1), (19, 1), (69, 1), (70, 1), (71, 1)],\n [],\n [],\n [(3, 1), (72, 1), (73, 1), (74, 1)],\n [],\n [],\n [(0, 1), (2, 1), (3, 1), (75, 1), (76, 1)],\n [],\n [(16, 1),\n  (54, 1),\n  (77, 1),\n  (78, 1),\n  (79, 1),\n  (80, 1),\n  (81, 1),\n  (82, 1),\n  (83, 1),\n  (84, 1),\n  (85, 1),\n  (86, 1),\n  (87, 1)],\n [(16, 1), (19, 1), (23, 1), (88, 1)],\n [],\n [],\n [(89, 1), (90, 1)],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (91, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (92, 1)],\n [],\n [],\n [],\n [],\n [(16, 1), (93, 1), (94, 1), (95, 1), (96, 1)],\n [],\n [],\n [(0, 1), (16, 1), (97, 1), (98, 1), (99, 1), (100, 1)],\n [],\n [],\n [(0, 1), (3, 1), (19, 1), (77, 1), (101, 2)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(0, 1), (25, 1), (30, 1), (77, 1), (102, 1)],\n [(16, 1), (23, 1), (54, 1), (103, 1)],\n [(29, 1), (89, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (104, 1), (105, 1), (106, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(19, 1), (23, 1), (77, 1), (107, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (75, 1), (108, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (109, 1)],\n [(3, 1), (72, 1), (110, 1), (111, 1)],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (112, 1)],\n [(3, 1), (71, 1), (109, 1), (113, 1), (114, 1), (115, 1), (116, 1)],\n [(3, 1), (11, 1), (16, 1), (117, 1), (118, 1)],\n [],\n [],\n [],\n [],\n [(16, 1), (23, 1), (119, 1)],\n [],\n [(3, 1), (11, 1), (16, 1), (29, 1)],\n [],\n [],\n [(3, 1), (16, 1), (120, 1)],\n [],\n [(3, 1), (11, 1), (121, 1), (122, 1), (123, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(89, 1), (122, 1)],\n [(16, 1), (19, 1), (124, 1)],\n [(3, 1), (125, 1), (126, 1)],\n [(11, 1), (16, 1), (127, 1)],\n [],\n [],\n [(3, 1), (11, 1), (128, 1)],\n [],\n [],\n [],\n [],\n [(23, 1), (25, 1), (129, 1)],\n [],\n [],\n [(0, 1),\n  (30, 1),\n  (77, 1),\n  (100, 1),\n  (130, 2),\n  (131, 1),\n  (132, 1),\n  (133, 2),\n  (134, 1),\n  (135, 2),\n  (136, 3),\n  (137, 1),\n  (138, 1),\n  (139, 1),\n  (140, 1),\n  (141, 1),\n  (142, 3),\n  (143, 2),\n  (144, 1),\n  (145, 1),\n  (146, 1)],\n [(16, 1), (147, 1)],\n [(2, 1), (16, 1), (55, 1), (148, 1), (149, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (16, 1), (55, 1), (117, 1)],\n [(16, 1), (24, 1), (30, 1), (150, 1)],\n [],\n [],\n [],\n [(16, 1), (151, 1)],\n [],\n [(16, 1), (29, 1)],\n [(16, 1), (23, 1), (152, 1)],\n [],\n [(16, 1), (46, 1), (153, 1)],\n [],\n [],\n [],\n [],\n [(19, 1),\n  (23, 1),\n  (77, 1),\n  (106, 1),\n  (154, 1),\n  (155, 1),\n  (156, 1),\n  (157, 1),\n  (158, 1)],\n [],\n [(16, 1), (19, 1), (29, 1)],\n [],\n [],\n [(16, 1),\n  (19, 1),\n  (23, 1),\n  (69, 1),\n  (71, 1),\n  (77, 2),\n  (88, 1),\n  (159, 1),\n  (160, 1),\n  (161, 1),\n  (162, 2),\n  (163, 1),\n  (164, 1),\n  (165, 1),\n  (166, 2),\n  (167, 1),\n  (168, 1),\n  (169, 1),\n  (170, 1),\n  (171, 1),\n  (172, 1),\n  (173, 1),\n  (174, 2),\n  (175, 1),\n  (176, 1),\n  (177, 1),\n  (178, 1),\n  (179, 1),\n  (180, 1),\n  (181, 1),\n  (182, 1),\n  (183, 1),\n  (184, 1),\n  (185, 1),\n  (186, 1)],\n [],\n [],\n [(3, 1),\n  (5, 1),\n  (16, 1),\n  (19, 1),\n  (20, 1),\n  (23, 1),\n  (40, 1),\n  (50, 1),\n  (60, 1),\n  (61, 1),\n  (69, 1),\n  (71, 1),\n  (96, 1),\n  (116, 1),\n  (134, 1),\n  (135, 1),\n  (139, 2),\n  (154, 1),\n  (155, 1),\n  (157, 1),\n  (158, 1),\n  (166, 1),\n  (170, 1),\n  (171, 1),\n  (173, 1),\n  (174, 2),\n  (175, 1),\n  (177, 1),\n  (180, 1),\n  (186, 1),\n  (187, 1),\n  (188, 1),\n  (189, 1),\n  (190, 1),\n  (191, 1),\n  (192, 1),\n  (193, 1),\n  (194, 2),\n  (195, 1),\n  (196, 1),\n  (197, 1),\n  (198, 1),\n  (199, 2),\n  (200, 1),\n  (201, 1),\n  (202, 1),\n  (203, 1),\n  (204, 1),\n  (205, 1),\n  (206, 1),\n  (207, 1),\n  (208, 1),\n  (209, 1),\n  (210, 1),\n  (211, 1),\n  (212, 1),\n  (213, 1),\n  (214, 1),\n  (215, 1),\n  (216, 1),\n  (217, 1),\n  (218, 1),\n  (219, 1),\n  (220, 1),\n  (221, 1),\n  (222, 1),\n  (223, 1),\n  (224, 1),\n  (225, 1),\n  (226, 1),\n  (227, 1),\n  (228, 1),\n  (229, 1),\n  (230, 1),\n  (231, 1),\n  (232, 1),\n  (233, 2),\n  (234, 1),\n  (235, 1),\n  (236, 1),\n  (237, 1),\n  (238, 1),\n  (239, 1),\n  (240, 1),\n  (241, 1),\n  (242, 1),\n  (243, 2),\n  (244, 1),\n  (245, 1),\n  (246, 1),\n  (247, 1),\n  (248, 1),\n  (249, 1),\n  (250, 1),\n  (251, 1),\n  (252, 1),\n  (253, 1),\n  (254, 1),\n  (255, 1),\n  (256, 1),\n  (257, 1),\n  (258, 1),\n  (259, 2),\n  (260, 1),\n  (261, 1),\n  (262, 1),\n  (263, 1),\n  (264, 1),\n  (265, 1),\n  (266, 1),\n  (267, 1),\n  (268, 1),\n  (269, 1),\n  (270, 1),\n  (271, 1),\n  (272, 1),\n  (273, 1),\n  (274, 1),\n  (275, 1),\n  (276, 1),\n  (277, 1),\n  (278, 1),\n  (279, 1),\n  (280, 1),\n  (281, 1),\n  (282, 1),\n  (283, 1),\n  (284, 1),\n  (285, 1),\n  (286, 1),\n  (287, 1),\n  (288, 1),\n  (289, 1),\n  (290, 2),\n  (291, 1),\n  (292, 1),\n  (293, 1),\n  (294, 1),\n  (295, 1),\n  (296, 1),\n  (297, 1),\n  (298, 1),\n  (299, 1),\n  (300, 1),\n  (301, 1),\n  (302, 1),\n  (303, 1),\n  (304, 1),\n  (305, 1),\n  (306, 1),\n  (307, 1),\n  (308, 1),\n  (309, 1),\n  (310, 1),\n  (311, 1),\n  (312, 1),\n  (313, 1),\n  (314, 1),\n  (315, 1),\n  (316, 1),\n  (317, 1),\n  (318, 1),\n  (319, 1),\n  (320, 1),\n  (321, 1),\n  (322, 1),\n  (323, 1),\n  (324, 1),\n  (325, 1),\n  (326, 1),\n  (327, 1),\n  (328, 1),\n  (329, 1),\n  (330, 1),\n  (331, 1),\n  (332, 1),\n  (333, 1)],\n [],\n [],\n [(16, 1), (334, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (335, 1)],\n [(3, 1), (16, 1), (113, 1), (336, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(0, 1), (30, 1), (39, 1), (77, 1)],\n [(3, 1), (4, 1), (16, 1), (151, 1)],\n [(16, 1), (19, 1), (119, 1), (152, 1), (337, 1), (338, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (29, 1)],\n [],\n [],\n [(0, 1), (16, 1), (30, 1), (339, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (117, 1)],\n [(23, 1), (26, 1), (30, 1), (77, 1)],\n [(3, 1), (11, 1), (340, 1)],\n [],\n [],\n [(3, 1), (124, 1), (341, 1)],\n [],\n [],\n [(3, 1), (16, 1), (342, 1), (343, 1), (344, 1)],\n [],\n [],\n [],\n [(16, 1), (19, 1), (23, 1), (77, 1), (345, 1)],\n [(16, 1), (23, 1), (25, 1), (346, 1)],\n [(3, 1), (11, 1), (16, 1), (347, 1)],\n [],\n [],\n [],\n [],\n [(3, 1),\n  (11, 1),\n  (16, 1),\n  (54, 1),\n  (185, 1),\n  (348, 1),\n  (349, 1),\n  (350, 1),\n  (351, 1),\n  (352, 1),\n  (353, 1),\n  (354, 1),\n  (355, 1)],\n [(3, 1), (11, 1), (57, 1)],\n [],\n [],\n [],\n [(3, 1), (11, 1), (16, 1), (356, 1)],\n [],\n [(29, 1), (89, 1), (90, 1)],\n [(0, 1),\n  (1, 1),\n  (2, 1),\n  (16, 1),\n  (75, 1),\n  (97, 1),\n  (323, 1),\n  (357, 2),\n  (358, 1),\n  (359, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (16, 1), (92, 1)],\n [],\n [],\n [],\n [],\n [(11, 1), (23, 1), (77, 1), (360, 1)],\n [],\n [],\n [(0, 1), (3, 1), (11, 1), (23, 1), (77, 1), (361, 1)],\n [(11, 1), (16, 1), (27, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(2, 1),\n  (3, 1),\n  (4, 1),\n  (150, 1),\n  (155, 1),\n  (362, 1),\n  (363, 1),\n  (364, 1),\n  (365, 1)],\n [(16, 1), (366, 1)],\n [(3, 2)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(0, 1), (2, 1), (16, 1), (367, 1)],\n [],\n [],\n [],\n [(11, 1), (16, 1), (117, 1), (368, 1)],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (19, 1), (255, 1), (369, 1), (370, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (371, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (23, 1), (346, 1)],\n [],\n [],\n [(23, 1), (372, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (373, 1)],\n [],\n [],\n [(23, 1), (77, 1), (88, 1)],\n [(16, 1), (112, 1)],\n [],\n [(16, 1), (75, 1), (374, 1)],\n [],\n [],\n [(0, 1), (23, 1), (30, 1), (77, 1), (375, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (376, 1), (377, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (28, 1)],\n [],\n [(3, 1), (16, 1), (342, 1), (343, 1), (378, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (379, 1)],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (25, 1), (380, 1), (381, 1)],\n [],\n [(11, 1), (16, 1), (382, 1), (383, 1)],\n [(3, 1), (16, 1), (384, 1)],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (77, 1), (124, 1), (332, 1), (385, 1)],\n [],\n [(16, 1), (386, 1)],\n [],\n [(16, 1),\n  (54, 1),\n  (90, 1),\n  (170, 1),\n  (327, 1),\n  (387, 1),\n  (388, 1),\n  (389, 1),\n  (390, 1)],\n [],\n [(16, 1), (391, 1)],\n [(2, 1), (16, 1), (29, 1)],\n [],\n [],\n [(11, 1), (16, 1), (392, 1)],\n [(16, 1), (393, 1)],\n [],\n [],\n [(16, 1), (394, 1)],\n [(3, 1), (11, 1), (16, 1), (395, 1)],\n [(23, 1), (126, 1)],\n [(23, 1),\n  (30, 1),\n  (37, 1),\n  (77, 1),\n  (142, 1),\n  (207, 2),\n  (303, 1),\n  (396, 1),\n  (397, 1),\n  (398, 1),\n  (399, 1),\n  (400, 1),\n  (401, 1),\n  (402, 1),\n  (403, 1)],\n [],\n [(3, 1), (11, 1), (23, 1), (404, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(1, 1), (2, 1), (4, 1), (75, 1), (151, 1), (405, 1)],\n [(0, 1), (3, 1), (11, 1), (77, 1), (406, 1)],\n [],\n [],\n [(17, 1), (89, 1)],\n [],\n [(3, 1), (16, 1), (407, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(2, 1), (3, 1), (408, 1)],\n [(3, 1), (4, 1), (11, 1), (16, 1), (23, 1), (409, 1)],\n [(0, 1), (1, 1), (2, 1), (3, 1), (75, 1), (357, 1)],\n [(30, 1),\n  (39, 1),\n  (77, 1),\n  (142, 1),\n  (403, 1),\n  (410, 1),\n  (411, 1),\n  (412, 1),\n  (413, 1),\n  (414, 1)],\n [(0, 1), (16, 1), (415, 1), (416, 1), (417, 1)],\n [(19, 1), (23, 1), (77, 1), (101, 1)],\n [(19, 1),\n  (23, 1),\n  (24, 1),\n  (60, 1),\n  (118, 1),\n  (150, 1),\n  (170, 1),\n  (171, 1),\n  (418, 1)],\n [(1, 1), (2, 1), (4, 1), (16, 1), (75, 1), (357, 1)],\n [(1, 1), (2, 1), (4, 1), (11, 1), (75, 1), (97, 1), (357, 1)],\n [(1, 1), (2, 1), (4, 1), (75, 1), (357, 1)],\n [(3, 1), (11, 1), (64, 1)],\n [],\n [],\n [],\n [],\n [],\n [(3, 1),\n  (16, 1),\n  (19, 1),\n  (23, 1),\n  (155, 1),\n  (198, 1),\n  (387, 1),\n  (419, 1),\n  (420, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(1, 1), (2, 1), (357, 1)],\n [],\n [],\n [],\n [],\n [],\n [(16, 1),\n  (19, 1),\n  (23, 1),\n  (25, 1),\n  (40, 1),\n  (60, 1),\n  (118, 1),\n  (327, 1),\n  (421, 1),\n  (422, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(11, 1), (423, 1)],\n [],\n [],\n [],\n [(16, 1), (19, 1), (54, 1), (77, 1), (424, 1)],\n [(0, 1), (23, 1), (25, 1), (77, 1), (372, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (149, 1)],\n [(16, 1), (149, 1)],\n [(3, 1), (30, 1), (425, 1)],\n [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (30, 1), (426, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (19, 1), (23, 1), (126, 1)],\n [(3, 1), (11, 1), (427, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(0, 1), (1, 1), (3, 1), (428, 1)],\n [],\n [],\n [],\n [],\n [(3, 1), (29, 1)],\n [],\n [(0, 1), (23, 1), (77, 1), (429, 1), (430, 1), (431, 1)],\n [],\n [(0, 1), (11, 1), (23, 1), (77, 1), (432, 1)],\n [],\n [],\n [],\n [],\n [(1, 1), (2, 1), (3, 1), (428, 1)],\n [],\n [(23, 1), (30, 1), (77, 1), (433, 1)],\n [(3, 1),\n  (54, 1),\n  (155, 3),\n  (157, 1),\n  (364, 2),\n  (434, 1),\n  (435, 1),\n  (436, 1),\n  (437, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (16, 1), (19, 1), (155, 1), (438, 1), (439, 1), (440, 1)],\n [],\n [(0, 1), (23, 1), (30, 1), (77, 1), (433, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (25, 1), (29, 1), (417, 1)],\n [],\n [],\n [],\n [],\n [(16, 1), (23, 1), (346, 1)],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (372, 1)],\n [],\n [],\n [],\n [],\n [(3, 1), (441, 1)],\n [(19, 1), (23, 1), (31, 1), (442, 1), (443, 1), (444, 1)],\n [],\n [],\n [(0, 1), (23, 1), (77, 1), (445, 1)],\n [],\n [(3, 1), (11, 1), (16, 1), (21, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (16, 1), (73, 1)],\n [(16, 1), (19, 1), (20, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (11, 1), (16, 1), (28, 1)],\n [],\n [],\n [],\n [(3, 1), (11, 1), (16, 1), (378, 1)],\n [(3, 1), (11, 1), (75, 1), (120, 1)],\n [(11, 1), (75, 1), (121, 1)],\n [],\n [(11, 1), (16, 1), (446, 1)],\n [(11, 1),\n  (16, 1),\n  (23, 1),\n  (25, 1),\n  (177, 1),\n  (447, 1),\n  (448, 1),\n  (449, 1),\n  (450, 1)],\n [(23, 1), (25, 1), (58, 1), (177, 1), (449, 1), (451, 1), (452, 1), (453, 1)],\n [(3, 1), (11, 1), (454, 1)],\n [(16, 1), (23, 1), (455, 1)],\n [(3, 1), (11, 1), (75, 1), (378, 1)],\n [(0, 1), (11, 1), (23, 1), (25, 1), (77, 1), (177, 1), (447, 1), (456, 1)],\n [(3, 1), (11, 1), (16, 1), (441, 1)],\n [(3, 1), (19, 1), (343, 1)],\n [(3, 1), (11, 1), (19, 1), (23, 1), (25, 1), (59, 1)],\n [(11, 1), (16, 1), (457, 1)],\n [(25, 1), (458, 1), (459, 1)],\n [(3, 1), (11, 1), (16, 1), (75, 1), (460, 1)],\n [(3, 1), (11, 1), (461, 1)],\n [(3, 1), (11, 1), (16, 1), (462, 1), (463, 1)],\n [],\n [(75, 1), (464, 1)],\n [(0, 1), (2, 1), (4, 1), (75, 1), (465, 1), (466, 1)],\n [(11, 1), (16, 1), (54, 1), (467, 1)],\n [(3, 1), (468, 1)],\n [(11, 1), (16, 1), (120, 1), (458, 1), (469, 1), (470, 1)],\n [(16, 1), (25, 1), (471, 1)],\n [(0, 1),\n  (16, 1),\n  (30, 1),\n  (58, 1),\n  (66, 1),\n  (100, 2),\n  (135, 1),\n  (142, 1),\n  (177, 1),\n  (207, 1),\n  (233, 1),\n  (279, 1),\n  (303, 1),\n  (323, 1),\n  (398, 1),\n  (402, 1),\n  (403, 1),\n  (409, 1),\n  (433, 1),\n  (472, 1),\n  (473, 1),\n  (474, 1),\n  (475, 1),\n  (476, 1),\n  (477, 1),\n  (478, 1),\n  (479, 1),\n  (480, 1),\n  (481, 1),\n  (482, 1),\n  (483, 1),\n  (484, 1),\n  (485, 1),\n  (486, 1),\n  (487, 1),\n  (488, 1),\n  (489, 1),\n  (490, 1),\n  (491, 1),\n  (492, 1),\n  (493, 1),\n  (494, 1),\n  (495, 1)],\n [(16, 1), (22, 1)],\n [(3, 1), (378, 1)],\n [(11, 1), (16, 1), (23, 1), (25, 1), (496, 1)],\n [(16, 1), (23, 1), (25, 1), (92, 1), (458, 1), (497, 1), (498, 1)],\n [(11, 1), (23, 1), (88, 1), (458, 1)],\n [(11, 1), (23, 1), (25, 1), (499, 1)],\n [],\n [],\n [(3, 1), (57, 1)],\n [],\n [(3, 1), (11, 1), (500, 1), (501, 1)],\n [],\n [(64, 1), (75, 1)],\n [(0, 1), (11, 1), (77, 1), (502, 1)],\n [(0, 1), (502, 1)],\n [],\n [(4, 1), (11, 1), (16, 1), (503, 1)],\n [(16, 1), (504, 1)],\n [(3, 1), (11, 1), (75, 1), (505, 1)],\n [],\n [(3, 1), (11, 1), (506, 1), (507, 1), (508, 1)],\n [(11, 1), (16, 1), (417, 1), (509, 1)],\n [],\n [(3, 1), (19, 1), (510, 1)],\n [(3, 1), (11, 1), (454, 1)],\n [(0, 1), (3, 1), (394, 1)],\n [(2, 1), (4, 1), (11, 1), (16, 1), (151, 1)],\n [(16, 1), (511, 1)],\n [(3, 1), (11, 1), (508, 1), (512, 1)],\n [(11, 1), (16, 1), (63, 1)],\n [],\n [],\n [],\n [(16, 1), (23, 1), (25, 1), (513, 1)],\n [(23, 1), (24, 1), (30, 1), (514, 1)],\n [(0, 1), (3, 1), (11, 1), (54, 1), (515, 1)],\n [(0, 1), (3, 1), (11, 1), (16, 1), (19, 1), (69, 1)],\n [(16, 1), (516, 1)],\n [(3, 1), (16, 1), (92, 1)],\n [(11, 1), (54, 1), (517, 1)],\n [],\n [],\n [],\n [],\n [(0, 1), (2, 1), (41, 1)],\n [],\n [],\n [],\n [(16, 1), (112, 1)],\n [],\n [(3, 1), (16, 1), (518, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [(16, 1), (45, 1), (346, 1)],\n [],\n [],\n [],\n [(2, 1), (3, 1), (357, 1), (394, 1)],\n [],\n [],\n [(2, 1), (3, 1), (11, 1), (46, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(1, 1), (2, 1), (4, 1), (11, 1), (75, 1), (357, 1)],\n [],\n [],\n [(1, 1), (2, 1), (4, 1), (75, 1), (357, 1), (519, 1)],\n [],\n [(2, 1), (3, 1), (4, 1), (75, 1), (357, 1), (520, 1)],\n [(1, 1), (2, 1), (16, 1), (75, 1), (357, 1)],\n [(0, 1), (1, 1), (2, 1), (75, 1), (357, 1)],\n [],\n [(0, 1), (1, 1), (2, 1), (4, 1), (75, 1), (357, 1)],\n [(1, 1), (2, 1), (16, 1), (357, 1)],\n [],\n [(11, 1), (75, 1), (521, 1)],\n [],\n [],\n [],\n [(16, 1), (29, 1)],\n [(3, 1), (16, 1), (522, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (28, 1), (93, 1), (523, 1), (524, 1), (525, 1)],\n [],\n [],\n [],\n [(3, 1), (11, 1), (75, 1), (505, 1)],\n [(19, 1), (458, 1), (526, 1)],\n [],\n [(16, 1), (23, 1), (24, 1), (527, 1)],\n [],\n [],\n [(3, 1), (19, 1), (528, 1)],\n [(3, 1), (302, 1), (529, 1), (530, 1)],\n [(3, 1), (11, 1), (75, 1), (342, 1)],\n [(16, 1), (17, 1)],\n [],\n [(16, 1), (19, 1), (256, 1), (531, 1)],\n [],\n [],\n [(11, 1), (16, 1), (66, 1), (75, 1), (234, 1)],\n [(19, 1),\n  (23, 1),\n  (25, 1),\n  (59, 1),\n  (158, 1),\n  (176, 1),\n  (194, 2),\n  (199, 1),\n  (245, 1),\n  (288, 1),\n  (292, 1),\n  (332, 1),\n  (530, 1)],\n [(16, 1), (54, 1), (348, 1)],\n [],\n [],\n [(16, 1), (66, 1), (532, 1), (533, 1)],\n [],\n [],\n [(16, 1), (90, 1)],\n [(2, 1), (16, 1), (30, 1), (433, 1)],\n [],\n [],\n [(11, 1), (77, 1), (534, 1), (535, 1)],\n [(16, 1), (407, 1), (536, 1)],\n [(16, 1), (516, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (16, 1), (537, 1)],\n [],\n [],\n [(16, 1), (538, 1)],\n [(3, 1), (19, 1), (539, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (384, 1)],\n [(0, 1), (3, 1), (4, 1), (55, 1), (519, 1), (540, 1), (541, 1)],\n [],\n [],\n [],\n [(29, 1), (89, 1), (542, 1)],\n [(1, 1), (2, 1), (3, 1), (19, 1), (428, 1)],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [],\n [(3, 1), (17, 1)],\n [],\n [],\n [],\n [],\n [(11, 1), (16, 1), (462, 1)],\n [(16, 1), (54, 1), (129, 1), (384, 1)],\n [(16, 1), (19, 1), (69, 1), (458, 1)],\n [],\n [(11, 1), (16, 1), (54, 1), (441, 1)],\n [],\n [(16, 1), (29, 1)],\n [],\n [(16, 1), (153, 1)],\n [(3, 1), (11, 1), (124, 1), (234, 1)],\n [(29, 1), (89, 1), (181, 1)],\n [],\n [],\n [],\n [(16, 1), (117, 1), (351, 1), (543, 1)],\n [],\n [(0, 1),\n  (30, 1),\n  (66, 1),\n  (77, 1),\n  (100, 1),\n  (135, 1),\n  (142, 1),\n  (146, 1),\n  (300, 1),\n  (544, 1),\n  (545, 1),\n  (546, 1),\n  (547, 1)],\n [(0, 1), (3, 1), (4, 1), (11, 1), (77, 1), (548, 1), (549, 1)],\n [(0, 1), (77, 1), (550, 1), (551, 1), (552, 2), (553, 1), (554, 2), (555, 1)],\n [],\n [(0, 1),\n  (24, 1),\n  (30, 1),\n  (60, 1),\n  (77, 1),\n  (100, 2),\n  (134, 1),\n  (142, 1),\n  (433, 1),\n  (556, 1),\n  (557, 1),\n  (558, 1)],\n [(0, 1), (77, 1), (417, 1), (559, 1)],\n [],\n [(16, 1),\n  (129, 1),\n  (259, 1),\n  (263, 1),\n  (327, 1),\n  (560, 1),\n  (561, 1),\n  (562, 1),\n  (563, 1),\n  (564, 1)],\n [(0, 1), (16, 1), (54, 1), (565, 1)],\n [(16, 1), (19, 1), (23, 1), (30, 1), (566, 1)],\n [(16, 1), (385, 1), (567, 1)],\n [(16, 1), (54, 1), (77, 1), (348, 1)],\n [(16, 1), (77, 1), (417, 1), (559, 1)],\n [(0, 1), (30, 1), (77, 1), (137, 1), (140, 1), (146, 1)],\n [],\n [(23, 1), (25, 1), (458, 1), (568, 1), (569, 1), (570, 1)],\n [(16, 1), (19, 1), (20, 1)],\n [(24, 1), (30, 1), (433, 1)],\n [],\n [],\n [(16, 1), (54, 1), (571, 1)],\n [],\n [(19, 1),\n  (24, 1),\n  (118, 1),\n  (177, 1),\n  (184, 1),\n  (344, 1),\n  (447, 2),\n  (572, 1),\n  (573, 1),\n  (574, 1)],\n [(3, 1), (75, 1), (77, 1), (575, 1)],\n [(16, 1), (19, 1), (20, 2), (369, 1)],\n [(16, 1), (54, 1), (348, 1), (576, 1)],\n [(0, 1), (1, 1), (2, 1), (3, 1), (30, 1), (99, 1)],\n [(11, 1), (16, 1), (28, 1), (54, 1), (77, 1)],\n [(16, 1), (577, 1), (578, 1)],\n [],\n [],\n [(16, 1), (542, 1)],\n [(3, 1), (11, 1), (579, 1)],\n [(16, 1), (98, 1), (100, 1)],\n [],\n [(16, 1), (54, 1), (77, 1), (103, 1)],\n [(3, 1), (19, 1), (580, 1)],\n [(3, 1), (11, 1), (16, 1), (581, 1)],\n [],\n [(16, 1),\n  (207, 1),\n  (303, 1),\n  (391, 1),\n  (401, 1),\n  (582, 1),\n  (583, 1),\n  (584, 1)],\n [],\n [(23, 1), (30, 2), (39, 1), (77, 2), (184, 1), (398, 1), (403, 2), (585, 1)],\n [],\n [(16, 1), (54, 1), (467, 1)],\n [(3, 1), (16, 1), (586, 1)],\n [(3, 1), (11, 1), (16, 1), (573, 1)],\n [(16, 1), (19, 1), (20, 1), (23, 1), (458, 1)],\n [(16, 1), (587, 1), (588, 1)],\n [(3, 1), (11, 1), (16, 1), (589, 1)],\n [],\n [(11, 1), (19, 1), (23, 1), (77, 1), (174, 1)],\n [(0, 1), (16, 1), (54, 1), (590, 1)],\n [],\n [(19, 1), (23, 1), (25, 1), (458, 1), (591, 1)],\n [(16, 1), (23, 1), (30, 1), (433, 1)],\n [(0, 1), (16, 1), (77, 1), (417, 1), (592, 1)],\n [],\n [],\n [(3, 1), (11, 1), (593, 1)],\n [(24, 2),\n  (60, 1),\n  (119, 1),\n  (170, 1),\n  (207, 1),\n  (279, 1),\n  (305, 1),\n  (476, 1),\n  (543, 1)],\n [(0, 1), (16, 1), (119, 1)],\n [(19, 1),\n  (23, 1),\n  (26, 1),\n  (117, 1),\n  (207, 1),\n  (458, 1),\n  (594, 1),\n  (595, 1),\n  (596, 1),\n  (597, 1)],\n [],\n [(0, 1), (16, 1), (54, 1), (77, 1), (590, 1)],\n [(16, 1), (54, 1), (348, 1)],\n [],\n [],\n [(16, 1), (86, 1), (417, 1)],\n [(19, 1), (23, 1), (54, 1), (88, 1), (566, 1)],\n [],\n [(3, 1), (23, 1), (25, 1), (598, 1)],\n [(0, 1), (3, 1), (30, 1), (39, 1)],\n [],\n [(16, 1), (54, 1), (599, 1)],\n [],\n [(0, 1), (1, 1), (30, 1), (77, 1), (600, 1), (601, 1), (602, 1)],\n [],\n [(16, 1), (152, 1)],\n [(3, 1), (11, 1), (16, 1), (29, 1)],\n [(3, 1), (4, 1), (425, 1)],\n [],\n [(16, 1), (29, 1)],\n [(3, 1), (16, 1), (54, 1), (603, 1)],\n [(1, 1), (2, 1), (4, 1), (75, 1), (357, 1)],\n [(16, 1), (417, 1), (604, 1)],\n [(19, 1), (25, 1), (458, 1), (605, 1)],\n [(3, 1), (32, 1)],\n [],\n [],\n [(3, 1), (66, 2), (606, 1), (607, 1)],\n [],\n [],\n [(0, 1), (77, 1), (552, 1), (608, 1)],\n [(3, 1), (16, 1), (468, 1)],\n [(16, 1), (149, 1)],\n [],\n [(11, 1), (16, 1), (609, 1)],\n [],\n [],\n [(16, 1), (54, 1), (348, 1), (610, 1)],\n [],\n [(16, 1), (30, 1), (100, 1), (586, 1)],\n ...]"
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "corpus"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "outputs": [],
   "source": [
    "# 训练tfidf 模型  计算 ti-idf值\n",
    "model = TfidfModel(corpus)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "outputs": [],
   "source": [
    "movie_profile = {}\n",
    "for i,mid in enumerate(movie_dataset.index):\n",
    "    # 根据每条数据返回，向量\n",
    "    vector = model[corpus[i]]\n",
    "    # 按照tf-idf 值得到top -n 的关键词\n",
    "    movie_tags = sorted(vector,key=lambda x:x[1],reverse=True)[:30]\n",
    "    # 根据关键词提取对应的名称\n",
    "    # print(movie_tags)\n",
    "    movie_profile[mid] = dict(map(lambda x:(dct[x[0]],x[1]),movie_tags))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "outputs": [
    {
     "data": {
      "text/plain": "{1: {'pixar': 0.837374709121301,\n  'fun': 0.34531665530514855,\n  'Animation': 0.21562355612017706,\n  'Children': 0.21205621229134275,\n  'Fantasy': 0.2004362408431816,\n  'Adventure': 0.17342978500637887,\n  'Comedy': 0.1335891911679789},\n 2: {'game': 0.49506005899914796,\n  'magic board game': 0.49506005899914796,\n  'Robin Williams': 0.4358410782580723,\n  'fantasy': 0.39847806133235253,\n  'Children': 0.250737357659749,\n  'Fantasy': 0.23699778877133218,\n  'Adventure': 0.20506508893148376},\n 3: {'moldy': 0.669101789463952,\n  'old': 0.669101789463952,\n  'Romance': 0.2429576564037987,\n  'Comedy': 0.21348809771752614},\n 4: {},\n 5: {'pregnancy': 0.7029528753875794,\n  'remake': 0.6543780838450272,\n  'Comedy': 0.27865135629966864},\n 6: {},\n 7: {'remake': 0.8403149045689591,\n  'Romance': 0.4072221611696183,\n  'Comedy': 0.35782813278389186},\n 8: {},\n 9: {},\n 10: {},\n 11: {'president': 0.6950047448554714,\n  'politics': 0.5609507121470478,\n  'Romance': 0.2972278571591045,\n  'Comedy': 0.26117559229370996,\n  'Drama': 0.21388224350968954},\n 12: {},\n 13: {},\n 14: {'president': 0.7567630578389799,\n  'politics': 0.6107969468749901,\n  'Drama': 0.23288787855615714},\n 15: {},\n 16: {'Mafia': 0.8386419762344016,\n  'Crime': 0.462290799731766,\n  'Drama': 0.2880396711930868},\n 17: {'Jane Austen': 0.8847129673094921,\n  'Romance': 0.3783590564247974,\n  'Drama': 0.2722634567762824},\n 18: {},\n 19: {},\n 20: {},\n 21: {'Hollywood': 0.804621345353484,\n  'Crime': 0.3832726603372739,\n  'Thriller': 0.34734713549506746,\n  'Comedy': 0.2916102294208992},\n 22: {'serial killer': 0.6388993627076743,\n  'Horror': 0.42025280033920365,\n  'Mystery': 0.3874895270925175,\n  'Crime': 0.3463466639213078,\n  'Thriller': 0.3138823455225707,\n  'Drama': 0.21579832272803287},\n 23: {},\n 24: {},\n 25: {'alcoholism': 0.8790219839011997,\n  'Romance': 0.38699954422500404,\n  'Drama': 0.27848106683945034},\n 26: {'Shakespeare': 0.9414237192194619, 'Drama': 0.337226008621808},\n 27: {},\n 28: {'Jane Austen': 0.7948154387926983,\n  'In Netflix queue': 0.43920286055948404,\n  'Romance': 0.3399132041299291,\n  'Drama': 0.24459819948491446},\n 29: {'kidnapping': 0.6492035989716409,\n  'Fantasy': 0.3861186658663803,\n  'Mystery': 0.3784163698497391,\n  'Sci-Fi': 0.3608544526872865,\n  'Adventure': 0.3340936595420796,\n  'Drama': 0.21074535489808738},\n 30: {},\n 31: {'teacher': 0.7904827565258387,\n  'high school': 0.5766072429729117,\n  'Drama': 0.20654563415015376},\n 32: {'time travel': 0.6927194923929421,\n  'Bruce Willis': 0.29620258549811723,\n  'Post apocalyptic': 0.27202278707671373,\n  'Brad Pitt': 0.2642386306361509,\n  'post-apocalyptic': 0.2642386306361509,\n  'remake': 0.24005883221474744,\n  'mindfuck': 0.22832130938542955,\n  'twist ending': 0.22591455686322978,\n  'Mystery': 0.15031563352988267,\n  'Sci-Fi': 0.14333963852913362,\n  'Thriller': 0.12176180340947809},\n 34: {'pigs': 0.5826242534552502,\n  'villain nonexistent or not needed for good story': 0.5826242534552502,\n  'Animal movie': 0.4591802696291012,\n  'Children': 0.29508675394898853,\n  'Drama': 0.15223418209659711},\n 36: {'Nun': 0.7208689660808514,\n  'death penalty': 0.5945440137343837,\n  'Crime': 0.3023033027322542,\n  'Drama': 0.18835621208578288},\n 38: {'twins': 0.8024240823781704,\n  'Children': 0.5049153608741369,\n  'Comedy': 0.31808186111894377},\n 39: {'Emma': 0.40942003321437914,\n  'quotable': 0.40942003321437914,\n  'Paul Rudd': 0.3785204133548341,\n  'chick flick': 0.3785204133548341,\n  'seen more than once': 0.3785204133548341,\n  'Jane Austen': 0.3476207934952891,\n  'funny': 0.273699094548059,\n  'Romance': 0.14866457289588528,\n  'Comedy': 0.13063229755880543},\n 40: {'South Africa': 0.8809979180218227,\n  'In Netflix queue': 0.4133428266743388,\n  'Drama': 0.23019638589274474},\n 41: {'Shakespeare': 0.7784156423626852,\n  'War': 0.5624233823119991,\n  'Drama': 0.2788351246773244},\n 42: {},\n 43: {'England': 0.9442351365080458, 'Drama': 0.3292719347038736},\n 44: {},\n 45: {'Journalism': 0.8722021594547914,\n  'Thriller': 0.33148173308908896,\n  'Comedy': 0.2782906618682024,\n  'Drama': 0.22789813774493745},\n 46: {'wedding': 0.8740750886054338,\n  'Romance': 0.3943125634641287,\n  'Drama': 0.28374344357843895},\n 47: {'mystery': 0.5637350537605167,\n  'serial killer': 0.5287560724841778,\n  'twist ending': 0.4819732623953145,\n  'Mystery': 0.32068812779194505,\n  'Thriller': 0.2597704833156544},\n 48: {},\n 49: {},\n 50: {'tricky': 0.4870917956251266,\n  'thriller': 0.40173391400451847,\n  'heist': 0.3510575678207524,\n  'mindfuck': 0.34712718972702794,\n  'twist ending': 0.3434680952620898,\n  'suspense': 0.3337985194794698,\n  'Mystery': 0.22853164069404985,\n  'Crime': 0.20426660805196142,\n  'Thriller': 0.18511996426175129},\n 52: {'adoption': 0.6933884668165918,\n  'prostitution': 0.5910865218047997,\n  'Romance': 0.2723295670016864,\n  'Comedy': 0.23929734124039978,\n  'Drama': 0.19596567872561146},\n 53: {},\n 54: {},\n 55: {},\n 57: {},\n 58: {'writing': 0.8483274342206527,\n  'Romance': 0.34989008769431595,\n  'Comedy': 0.30745015546219273,\n  'Drama': 0.25177746679983426},\n 60: {},\n 61: {},\n 62: {'music': 0.9388828819020486, 'Drama': 0.3442367413151943},\n 63: {},\n 64: {},\n 65: {},\n 66: {},\n 68: {},\n 69: {},\n 70: {},\n 71: {},\n 72: {},\n 73: {},\n 74: {},\n 75: {},\n 76: {},\n 77: {},\n 78: {},\n 79: {},\n 80: {},\n 81: {},\n 82: {},\n 83: {},\n 85: {},\n 86: {},\n 87: {},\n 88: {},\n 89: {},\n 92: {'Jekyll and Hyde': 0.824327552080362,\n  'Horror': 0.41945558928702786,\n  'Thriller': 0.31328691706919337,\n  'Drama': 0.21538895767970737},\n 93: {},\n 94: {},\n 95: {},\n 96: {'theater': 0.9132602123302859,\n  'Comedy': 0.31517792857821875,\n  'Drama': 0.2581059044492865},\n 97: {},\n 99: {},\n 100: {},\n 101: {'off-beat comedy': 0.5850187220026996,\n  'crime': 0.45146715084800465,\n  'quirky': 0.44422578674503344,\n  'Crime': 0.2786673299483278,\n  'Adventure': 0.2752538074968491,\n  'Romance': 0.24128920471891763,\n  'Comedy': 0.2120220209467375},\n 102: {},\n 103: {},\n 104: {'test tag': 0.6253540342615053,\n  'golf': 0.550549315443844,\n  'Adam Sandler': 0.5157671019688815,\n  'Comedy': 0.19952964597722403},\n 105: {},\n 106: {},\n 107: {'muppets': 0.7460937765527096,\n  'Musical': 0.38518767312114544,\n  'Children': 0.3778805797369559,\n  'Adventure': 0.30904894034336006,\n  'Comedy': 0.23805367670998334},\n 108: {},\n 110: {'Medieval': 0.32447188109021985,\n  'Oscar (Best Cinematography)': 0.32447188109021985,\n  'Scotland': 0.32447188109021985,\n  'historical': 0.32447188109021985,\n  'mel gibson': 0.32447188109021985,\n  'sword fight': 0.32447188109021985,\n  'beautiful scenery': 0.2999834414257409,\n  'epic': 0.2999834414257409,\n  'inspirational': 0.26761148503331217,\n  'revenge': 0.25100656209678307,\n  'War': 0.17100805699824412,\n  'Action': 0.13143825237929818,\n  'Drama': 0.08478141982276385},\n 111: {'assassination': 0.7843239154955582,\n  'Crime': 0.4173374528900225,\n  'Thriller': 0.37821891253238255,\n  'Drama': 0.2600305755095093},\n 112: {},\n 113: {},\n 116: {'Holocaust': 0.8070811085281034, 'Documentary': 0.5904405848661215},\n 117: {},\n 118: {},\n 119: {},\n 121: {},\n 122: {'dating': 0.8861860775447925,\n  'Romance': 0.34805117525055645,\n  'Comedy': 0.3058342939771164},\n 123: {},\n 125: {},\n 126: {},\n 128: {},\n 129: {},\n 132: {},\n 135: {},\n 137: {},\n 140: {'journalism': 0.8524577937514926,\n  'Romance': 0.42434939893589624,\n  'Drama': 0.30535765504581885},\n 141: {},\n 144: {},\n 145: {},\n 146: {},\n 147: {'Mark Wahlberg': 0.5343864811598665,\n  'addiction': 0.5343864811598665,\n  'heroin': 0.4940554328809621,\n  'Leonardo DiCaprio': 0.4065400828861904,\n  'Drama': 0.1396301104878383},\n 148: {},\n 149: {},\n 150: {'moon': 0.5526854875473075,\n  'NASA': 0.5262936324407403,\n  'space': 0.4360592838319641,\n  'IMAX': 0.37641762355706315,\n  'Adventure': 0.2476234020250595,\n  'Drama': 0.15620015600526563},\n 151: {},\n 152: {},\n 153: {'superhero': 0.8611836035896726,\n  'Crime': 0.2721768625336356,\n  'Adventure': 0.2688428447598123,\n  'Action': 0.2629113591134528,\n  'Comedy': 0.2070837957934439},\n 154: {},\n 155: {},\n 156: {},\n 157: {},\n 158: {},\n 159: {},\n 160: {'Michael Crichton': 0.7546434092450265,\n  'Mystery': 0.3540603598801837,\n  'Sci-Fi': 0.33762878026011345,\n  'Adventure': 0.3125903918430547,\n  'Action': 0.30569370309516014},\n 161: {'submarine': 0.7971166424952678,\n  'War': 0.4544029749982827,\n  'Thriller': 0.3276755999773303,\n  'Drama': 0.22528137017800182},\n 162: {'Documentary': 0.7552168710325943,\n  'In Netflix queue': 0.655475001588724},\n 163: {},\n 164: {},\n 165: {},\n 166: {},\n 168: {},\n 169: {},\n 170: {},\n 171: {},\n 172: {},\n 173: {},\n 174: {},\n 175: {'New York City': 0.58487804932181,\n  'Nudity (Full Frontal)': 0.58487804932181,\n  'controversial': 0.5371329112774463,\n  'Drama': 0.16529842850324944},\n 176: {},\n 177: {},\n 178: {},\n 179: {},\n 180: {},\n 181: {},\n 183: {},\n 184: {},\n 185: {'computers': 0.8207773320260541,\n  'Crime': 0.3442008325427274,\n  'Action': 0.33248347361123376,\n  'Thriller': 0.3119376506362998},\n 186: {},\n 187: {},\n 188: {},\n 189: {},\n 190: {},\n 191: {},\n 193: {},\n 194: {},\n 195: {},\n 196: {},\n 198: {},\n 199: {'Made me cry': 0.8257249904745906,\n  'Musical': 0.4262990762213086,\n  'Romance': 0.29982913164898023,\n  'Drama': 0.21575409505549306},\n 201: {},\n 202: {},\n 203: {},\n 204: {},\n 205: {},\n 206: {},\n 207: {},\n 208: {},\n 209: {},\n 210: {},\n 211: {},\n 212: {},\n 213: {},\n 214: {},\n 215: {'generation X': 0.8847129673094921,\n  'Romance': 0.3783590564247974,\n  'Drama': 0.2722634567762824},\n 216: {'stop looking at me swan': 0.6158324914062021,\n  'school': 0.5693545754815452,\n  'Adam Sandler': 0.5079141126289285,\n  'Comedy': 0.19649163875093476},\n 217: {},\n 218: {},\n 219: {},\n 220: {},\n 222: {'Ireland': 0.8790219839011997,\n  'Romance': 0.38699954422500404,\n  'Drama': 0.27848106683945034},\n 223: {'independent film': 0.468946466753543,\n  'cynical': 0.4335542866411109,\n  'hilarious': 0.41285118845656904,\n  'generation X': 0.3981621065286788,\n  'witty': 0.3774590083441369,\n  'quirky': 0.31349277741558357,\n  'Comedy': 0.1496252000742298},\n 224: {'mental illness': 0.6280058475883457,\n  'psychology': 0.6050827373701024,\n  'Romance': 0.3233899348268009,\n  'Comedy': 0.28416433970050853,\n  'Drama': 0.23270821735993452},\n 225: {},\n 227: {},\n 228: {},\n 229: {},\n 230: {'Stephen King': 0.8452219844931582,\n  'Thriller': 0.44037791932316406,\n  'Drama': 0.3027657264322505},\n 231: {},\n 232: {'In Netflix queue': 0.6493502433442517,\n  'Romance': 0.5025530151067855,\n  'Comedy': 0.4415958269659919,\n  'Drama': 0.36163220830294135},\n 233: {},\n 234: {},\n 235: {'movie business': 0.8824360907735219,\n  'Comedy': 0.363962575230702,\n  'Drama': 0.2980566884533547},\n 236: {},\n 237: {'infertility': 0.5966240583659632,\n  'France': 0.5515958352528207,\n  'basketball': 0.5065676121396783,\n  'Romance': 0.2166402560227005,\n  'Comedy': 0.19036286747208284},\n 238: {},\n 239: {},\n 240: {},\n 241: {},\n 242: {},\n 243: {},\n 246: {'basketball': 0.8435443970895684, 'Documentary': 0.5370594474904958},\n 247: {'Australia': 0.8348296114427604,\n  'Crime': 0.46723497797237373,\n  'Drama': 0.29112024185460766},\n 248: {'Sinbad': 0.7669114821352502,\n  'bad': 0.5932711762773452,\n  'Comedy': 0.24469591326299073},\n 249: {'Beethoven': 0.9128246380527296,\n  'Romance': 0.3314558984315574,\n  'Drama': 0.2385124054610833},\n 250: {},\n 251: {},\n 252: {'Einstein': 0.9003338334747673,\n  'Romance': 0.3269203604093219,\n  'Comedy': 0.28726654217027975},\n 253: {},\n 254: {},\n 255: {},\n 256: {},\n 257: {'court': 0.7703319128251171,\n  'Mystery': 0.49547953374510356,\n  'Thriller': 0.4013586621999467},\n 258: {},\n 259: {},\n 260: {'classic sci-fi': 0.48526540913388666,\n  'sci-fi': 0.3355669949084688,\n  'EPIC': 0.3235102727559245,\n  'Star Wars': 0.3235102727559245,\n  'space action': 0.3235102727559245,\n  'classic': 0.24240252331342302,\n  'Nerd': 0.16175513637796224,\n  'ROBOTS AND ANDROIDS': 0.16175513637796224,\n  'engrossing adventure': 0.16175513637796224,\n  'oldie but goodie': 0.16175513637796224,\n  'space adventure': 0.16175513637796224,\n  'space epic': 0.16175513637796224,\n  'darth vader': 0.14954720364646648,\n  'luke skywalker': 0.14954720364646648,\n  'great soundtrack': 0.13340919438820725,\n  'space opera': 0.13340919438820725,\n  'space': 0.1179901553242236,\n  'action': 0.1179901553242236,\n  'Sci-Fi': 0.07236953073072853,\n  'Adventure': 0.06700264104022215,\n  'Action': 0.06552436028496039},\n 261: {'Louisa May Alcott': 0.9675177246180529, 'Drama': 0.2528031893586504},\n 262: {'Girl Power': 0.590509648633555,\n  'England': 0.502580677693325,\n  'India': 0.502580677693325,\n  'Children': 0.3397174712460049,\n  'Drama': 0.17525900667164482},\n 263: {},\n 265: {},\n 266: {},\n 267: {},\n 269: {},\n 270: {},\n 271: {},\n 272: {'England': 0.6759590566322982,\n  'mental illness': 0.6361315237931544,\n  'Comedy': 0.28784110070874863,\n  'Drama': 0.23571919509481792},\n 273: {'gothic': 0.7424765248014012,\n  'Horror': 0.46937761116413423,\n  'Sci-Fi': 0.41269943730252157,\n  'Drama': 0.24102373888658166},\n 274: {},\n 275: {},\n 276: {},\n 277: {'Christmas': 0.9474159668954016, 'Drama': 0.3200046650779502},\n 278: {},\n 279: {'In Netflix queue': 0.873652990606142, 'Drama': 0.4865495370514129},\n 280: {'prison': 0.8787273366828048,\n  'Thriller': 0.3933325820587667,\n  'Drama': 0.270421426032273},\n 281: {},\n 282: {'twins': 0.6960391940175992,\n  'disability': 0.6815250576352972,\n  'Drama': 0.22594918943544756},\n 283: {},\n 284: {},\n 285: {},\n 287: {},\n 288: {'brutality': 0.4219341463631337,\n  'psychedelic': 0.4219341463631337,\n  'controversial': 0.40692189638688225,\n  'stylized': 0.3646047404536579,\n  'satire': 0.3495924904774065,\n  'dark comedy': 0.33458024050115504,\n  'Crime': 0.20098375925223613,\n  'Action': 0.19414182679913258,\n  'Thriller': 0.1821448286863503},\n 289: {},\n 290: {'In Netflix queue': 0.6885800071289578,\n  'Crime': 0.6154679588413323,\n  'Drama': 0.3834798109704973},\n 291: {},\n 292: {},\n 293: {'Jean Reno': 0.35027387728481135,\n  'assassin': 0.30837418932580585,\n  'hit men': 0.29740235768741874,\n  'French': 0.17513693864240568,\n  'Gary Oldman': 0.17513693864240568,\n  'Guns': 0.17513693864240568,\n  'Lolita theme': 0.17513693864240568,\n  'Luc Besson': 0.17513693864240568,\n  'Natalie Portman': 0.17513693864240568,\n  'assassins': 0.17513693864240568,\n  'awkward romance': 0.17513693864240568,\n  'police corruption': 0.17513693864240568,\n  'sniper': 0.1619190587430575,\n  'drama': 0.15418709466290292,\n  'great acting': 0.15418709466290292,\n  'hitman': 0.15418709466290292,\n  'love story': 0.15418709466290292,\n  'loneliness': 0.14870117884370937,\n  'unique': 0.14870117884370937,\n  'corruption': 0.14444597194926226,\n  'friendship': 0.14444597194926226,\n  'humorous': 0.14444597194926226,\n  'organized crime': 0.14444597194926226,\n  'Action': 0.14189021905301874,\n  'touching': 0.14096921476355478,\n  'assassination': 0.13802965844780438,\n  'police': 0.13802965844780438,\n  'tense': 0.13323725068340017,\n  'imdb top 250': 0.1294105869872503,\n  'disturbing': 0.12775133486420662},\n 294: {},\n 295: {},\n 296: {'non-linear': 0.15400541621333916,\n  'Tarantino': 0.14238236792246217,\n  'hit men': 0.13075931963158519,\n  'good dialogue': 0.13075931963158519,\n  'great soundtrack': 0.12701753383851935,\n  'Quentin Tarantino': 0.12701753383851935,\n  'cult film': 0.12396027223735102,\n  'drugs': 0.11716122484311686,\n  '1990s': 0.07700270810666958,\n  'AWESOME': 0.07700270810666958,\n  'Black comedy': 0.07700270810666958,\n  'Harvey Keitel': 0.07700270810666958,\n  'John Travolta': 0.07700270810666958,\n  \"Palme d'Or\": 0.07700270810666958,\n  'Quotable': 0.07700270810666958,\n  'Roger Avary': 0.07700270810666958,\n  'Uma Thurman': 0.07700270810666958,\n  'achronological': 0.07700270810666958,\n  'action packed': 0.07700270810666958,\n  'aggressive': 0.07700270810666958,\n  'amazing': 0.07700270810666958,\n  'amazing dialogues': 0.07700270810666958,\n  'anthology': 0.07700270810666958,\n  'bad ass': 0.07700270810666958,\n  'bad language': 0.07700270810666958,\n  'bad-ass': 0.07700270810666958,\n  'bible': 0.07700270810666958,\n  'biblical references': 0.07700270810666958,\n  'big boys with guns': 0.07700270810666958,\n  'big name actors': 0.07700270810666958},\n 298: {},\n 299: {},\n 300: {'TV': 0.96230647251818, 'Drama': 0.27196737478899424},\n 301: {},\n 302: {},\n 303: {},\n 304: {},\n 305: {},\n 306: {},\n 307: {'Death': 0.9675177246180529, 'Drama': 0.2528031893586504},\n 308: {'cynical': 0.7080282867057177,\n  'marriage': 0.631623200101905,\n  'Comedy': 0.2443497327112155,\n  'Drama': 0.20010318948370734},\n 310: {},\n 311: {},\n 312: {},\n 313: {},\n 314: {},\n 315: {},\n 316: {'time travel': 0.7015900951909583,\n  'Sci-Fi': 0.43552551246780985,\n  'Adventure': 0.40322714934157894,\n  'Action': 0.3943307398028433},\n 317: {'Christmas': 0.7744808060462135,\n  'Fantasy': 0.4792797223240379,\n  'Comedy': 0.3194361966635327,\n  'Drama': 0.2615930906888354},\n 318: {'wrongful imprisonment': 0.5462065041819145,\n  'Morgan Freeman': 0.5049833788464848,\n  'prison': 0.4637602535110551,\n  'Stephen King': 0.3984231456918708,\n  'Crime': 0.2290569270664261,\n  'Drama': 0.14271857020514842},\n 319: {},\n 320: {},\n 321: {},\n 322: {},\n 324: {},\n 325: {},\n 326: {'In Netflix queue': 0.873652990606142, 'Drama': 0.4865495370514129},\n 327: {},\n 328: {},\n 329: {'Enterprise': 0.8333271263122062,\n  'Sci-Fi': 0.37283201279917616,\n  'Adventure': 0.34518296954051886,\n  'Drama': 0.2177404609243412},\n 330: {},\n 331: {},\n 332: {},\n 333: {},\n 334: {},\n 335: {},\n 336: {},\n 337: {'mental illness': 0.9376935735002573, 'Drama': 0.34746332499462074},\n 338: {'serial killer': 0.7352041281143276,\n  'Sci-Fi': 0.42520425750513396,\n  'Action': 0.38498573477187725,\n  'Thriller': 0.3611955335068763},\n 339: {'coma': 0.8861860775447925,\n  'Romance': 0.34805117525055645,\n  'Comedy': 0.3058342939771164},\n 340: {},\n 341: {},\n 342: {'weddings': 0.7503879422754207,\n  'Australia': 0.6081555730775681,\n  'Comedy': 0.25896859852568},\n 343: {},\n 344: {},\n 345: {'cross dressing': 0.5629977266891043,\n  'men in drag': 0.5494465949725973,\n  'remade': 0.5494465949725973,\n  'Comedy': 0.21780128407460264,\n  'Drama': 0.1783621006390999},\n 346: {},\n 347: {},\n 348: {},\n 349: {'Tom Clancy': 0.7793066941833574,\n  'Crime': 0.3534880633491122,\n  'Action': 0.3414545465047102,\n  'Thriller': 0.32035435589892036,\n  'Drama': 0.22024791667244362},\n 350: {'John Grisham': 0.790466824403122,\n  'Mystery': 0.4367998827282114,\n  'Thriller': 0.3538257478684831,\n  'Drama': 0.24325994761154843},\n 351: {'interracial romance': 0.8764092888721928,\n  'Romance': 0.3182331152416974,\n  'Comedy': 0.2796330167539845,\n  'Drama': 0.22899742069108042},\n 352: {},\n 353: {},\n 354: {},\n 355: {},\n 356: {'bubba gump shrimp': 0.3654986417153648,\n  'lieutenant dan': 0.3654986417153648,\n  'shrimp': 0.3654986417153648,\n  'stupid is as stupid does': 0.3654986417153648,\n  'touching': 0.2941929721916777,\n  'heartwarming': 0.28274424643443424,\n  'bittersweet': 0.2780568995219443,\n  'Vietnam': 0.2738639248289857,\n  'emotional': 0.26342275799926373,\n  'War': 0.19263059820540443,\n  'Romance': 0.13271626949477897,\n  'Comedy': 0.11661844425893099,\n  'Drama': 0.09550132259166141},\n 357: {'wedding': 0.8572903009918638,\n  'Romance': 0.3867406137341949,\n  'Comedy': 0.33983089546681566},\n 358: {},\n 359: {},\n 360: {},\n 361: {'gambling': 0.832219238252512,\n  'Romance': 0.3663940968457076,\n  'Comedy': 0.32195231016106457,\n  'Drama': 0.2636535895090006},\n 362: {},\n 363: {'Holocaust': 0.7182597416492331,\n  'Documentary': 0.5254610688615756,\n  'In Netflix queue': 0.4560631629904211},\n 364: {'Disney': 0.5459840333382661,\n  'Disney animated feature': 0.4114800943322419,\n  'Oscar (Best Music - Original Score)': 0.4114800943322419,\n  'soundtrack': 0.34936990204480967,\n  'IMAX': 0.2590961351189228,\n  'Musical': 0.21243584258782286,\n  'Animation': 0.2119118245299939,\n  'Children': 0.20840588875425353,\n  'Adventure': 0.17044437458430017,\n  'Drama': 0.1075158393050158},\n 365: {},\n 366: {},\n 367: {},\n 368: {},\n 369: {},\n 370: {},\n 371: {'journalism': 0.8705052344183988,\n  'Comedy': 0.38077214484325994,\n  'Drama': 0.3118224022767317},\n 372: {},\n 373: {},\n 374: {},\n 376: {},\n 377: {'bus': 0.833223145726625,\n  'Action': 0.33752506919339925,\n  'Thriller': 0.31666770071751954,\n  'Romance': 0.3025517879862852},\n 378: {},\n 379: {},\n 380: {'spies': 0.7638786735631683,\n  'Adventure': 0.31641584749086893,\n  'Action': 0.3094347576301753,\n  'Thriller': 0.29031323052535746,\n  'Romance': 0.27737210575155463,\n  'Comedy': 0.2437282450501858},\n 381: {'alcoholism': 0.8790219839011997,\n  'Romance': 0.38699954422500404,\n  'Drama': 0.27848106683945034},\n 382: {},\n 383: {},\n 384: {},\n 385: {},\n 386: {},\n 387: {},\n 388: {},\n 389: {},\n 390: {},\n 391: {},\n 393: {},\n 405: {},\n 406: {},\n 407: {},\n 408: {},\n 409: {},\n 410: {'Christina Ricci': 0.43174619916107754,\n  'Christopher Lloyd': 0.43174619916107754,\n  'gothic': 0.37588485966600893,\n  'family': 0.36804673192244836,\n  'black comedy': 0.34064027850421374,\n  'dark comedy': 0.32601245567569204,\n  'Children': 0.2365208668749077,\n  'Fantasy': 0.2235603141503085,\n  'Comedy': 0.149001205665023},\n 412: {'Edith Wharton': 0.96230647251818, 'Drama': 0.27196737478899424},\n 413: {'Comedy': 1.0},\n 414: {},\n 415: {},\n 416: {},\n 417: {},\n 418: {},\n 419: {},\n 420: {},\n 421: {'horses': 0.7695609689866203,\n  'Children': 0.459057969085359,\n  'Adventure': 0.37543971960867195,\n  'Drama': 0.23682633504693146},\n 422: {},\n 423: {},\n 424: {},\n 425: {'Oscar (Best Actress)': 0.7129540032501256,\n  'mental illness': 0.5921073357089567,\n  'Romance': 0.30490409196142604,\n  'Drama': 0.21940598659662647},\n 426: {},\n 427: {},\n 428: {},\n 429: {},\n 430: {},\n 431: {'gangster': 0.5784874200411962,\n  'mafia': 0.5508634702276193,\n  'Al Pacino': 0.5160614093049406,\n  'Crime': 0.2623978458140567,\n  'Drama': 0.16349230673400678},\n 432: {},\n 433: {},\n 434: {},\n 435: {},\n 436: {},\n 437: {},\n 438: {},\n 440: {'President': 0.9003338334747673,\n  'Romance': 0.3269203604093219,\n  'Comedy': 0.28726654217027975},\n 441: {},\n 442: {},\n 444: {},\n 445: {},\n 446: {},\n 448: {},\n 449: {},\n 450: {},\n 451: {},\n 452: {},\n 453: {},\n 454: {'John Grisham': 0.8787273366828048,\n  'Thriller': 0.3933325820587667,\n  'Drama': 0.270421426032273},\n 455: {},\n 456: {},\n 457: {'based on a TV show': 0.9127339331088291,\n  'Thriller': 0.40855448516897674},\n 458: {},\n 459: {},\n 460: {},\n 461: {},\n 464: {},\n 466: {},\n 467: {},\n 468: {},\n 469: {},\n 470: {},\n 471: {'hula hoop': 0.9526819018195799, 'Comedy': 0.30396906741546625},\n 472: {},\n 473: {},\n 474: {'assassination': 0.8173910303074021,\n  'Action': 0.42012634692618434,\n  'Thriller': 0.39416463082253184},\n 475: {'Ireland': 0.9533034808868223, 'Drama': 0.30201402836469093},\n 476: {},\n 477: {'biopic': 0.8186273438280529,\n  'Musical': 0.5124335171966204,\n  'Drama': 0.2593475706277732},\n 478: {},\n 479: {},\n 480: {'Dinosaur': 0.7714523215753393,\n  'Sci-Fi': 0.34514911701527085,\n  'Adventure': 0.31955302403120917,\n  'Action': 0.3125027185749294,\n  'Thriller': 0.2931916067614935},\n 481: {},\n 482: {},\n 484: {},\n 485: {},\n 486: {},\n 487: {},\n 488: {'Japan': 0.706239670159516,\n  'sexuality': 0.6102808550957471,\n  'Romance': 0.2912864186130337,\n  'Drama': 0.20960684275131333},\n 489: {},\n 490: {},\n 491: {},\n 492: {},\n 493: {},\n 494: {},\n 495: {},\n 496: {},\n 497: {'Shakespeare': 0.8335845919591379,\n  'Romance': 0.41495440965280656,\n  'Comedy': 0.36462249787692486},\n 499: {},\n 500: {'cross dressing': 0.5695975695473002,\n  'men in drag': 0.5558875822339765,\n  'divorce': 0.534254827265556,\n  'Comedy': 0.2203544990896243,\n  'Drama': 0.18045298268048662},\n 501: {},\n 502: {},\n 504: {},\n 505: {},\n 506: {},\n 507: {},\n 508: {'AIDs': 0.96230647251818, 'Drama': 0.27196737478899424},\n 509: {},\n 510: {},\n 511: {},\n 512: {},\n 513: {'radio': 0.6489830818885371,\n  'show business': 0.5960048809299715,\n  'Mystery': 0.3293431856450533,\n  'Romance': 0.2548892722337046,\n  'Comedy': 0.2239724677263863},\n 514: {},\n 515: {'Butler': 0.6741812136767888,\n  'Housekeeper': 0.6741812136767888,\n  'Romance': 0.24480204693161395,\n  'Drama': 0.17615714594837087},\n 516: {'military': 0.8791348760622193,\n  'Comedy': 0.36871353207350627,\n  'Drama': 0.3019473479879331},\n 517: {},\n 518: {},\n 519: {},\n 520: {},\n 521: {},\n 522: {'violence': 0.5581684854939957,\n  'racism': 0.5387888027953046,\n  'Australia': 0.5306641066670867,\n  'Action': 0.28688996830772995,\n  'Drama': 0.18505220820988325},\n 523: {},\n 524: {'football': 0.9586683655604169, 'Drama': 0.28452585976272676},\n 526: {},\n 527: {'biography': 0.43475861057984233,\n  'holocaust': 0.40194664558606563,\n  'based on a true story': 0.382752876489731,\n  'moving': 0.382752876489731,\n  'Holocaust': 0.3212478614107512,\n  'disturbing': 0.31712894650217766,\n  'thought-provoking': 0.29537576171263064,\n  'War': 0.22913302998309043,\n  'Drama': 0.11359829443859355},\n 528: {},\n 529: {'chess': 0.96230647251818, 'Drama': 0.27196737478899424},\n 531: {'Children': 0.6861121265626725,\n  'In Netflix queue': 0.635578887787881,\n  'Drama': 0.3539627482970026},\n 532: {},\n 533: {},\n 534: {'C.S. Lewis': 0.9001603041299965,\n  'Romance': 0.3535395778664867,\n  'Drama': 0.2544036040440045},\n 535: {'large cast': 0.9675177246180529, 'Drama': 0.2528031893586504},\n 536: {},\n 537: {},\n 538: {'race': 0.9474159668954016, 'Drama': 0.3200046650779502},\n 539: {'Empire State Building': 0.8764092888721928,\n  'Romance': 0.3182331152416974,\n  'Comedy': 0.2796330167539845,\n  'Drama': 0.22899742069108042},\n 540: {'bad': 0.8975337311488002, 'Thriller': 0.44094580330252975},\n 541: {'atmospheric': 0.402023117887796,\n  'Philip K. Dick': 0.32283671269413783,\n  'androids': 0.32283671269413783,\n  'cyberpunk': 0.29847168194408985,\n  'artificial intelligence': 0.26626286326280696,\n  'existentialism': 0.26626286326280696,\n  'future': 0.259854021876346,\n  'robots': 0.24560139255865004,\n  'philosophical': 0.241897832512759,\n  'dreamlike': 0.2385475549283519,\n  'mindfuck': 0.23007039294185783,\n  'sci-fi': 0.22324555487739484,\n  'Sci-Fi': 0.14443770951256713,\n  'Action': 0.1307758724047847,\n  'Thriller': 0.12269457472512017},\n 542: {},\n 543: {'beat poetry': 0.8518457175636112,\n  'Thriller': 0.3237452369517327,\n  'Romance': 0.309313832985971,\n  'Comedy': 0.27179559919750307},\n 544: {},\n 546: {},\n 547: {},\n 548: {},\n 549: {},\n 550: {},\n 551: {'Halloween': 0.61828680133177,\n  'Christmas': 0.4782973608395742,\n  'Musical': 0.31920445098321404,\n  'Animation': 0.31841706550994847,\n  'Children': 0.31314907357956145,\n  'Fantasy': 0.2959895513250881},\n 552: {'knights': 0.7982583234893572,\n  'Adventure': 0.330656677146568,\n  'Action': 0.32336139154534616,\n  'Romance': 0.2898557058637912,\n  'Comedy': 0.25469764638569364},\n 553: {},\n 555: {},\n 556: {'politics': 0.7851326090915529, 'Documentary': 0.6193276888215888},\n 558: {},\n 562: {'adolescence': 0.8732038924640715,\n  'Comedy': 0.37705533151507487,\n  'Drama': 0.30877862484578034},\n 563: {},\n 564: {},\n 567: {},\n 568: {},\n 569: {},\n 573: {},\n 574: {},\n 575: {},\n 577: {},\n 579: {},\n 580: {},\n 581: {},\n 583: {},\n 585: {},\n 586: {'christmas': 0.8172997991949558,\n  'Children': 0.4875351025746099,\n  'Comedy': 0.3071328083963834},\n 587: {'overrated': 0.7309618135497714,\n  'Fantasy': 0.3974759198466398,\n  'Thriller': 0.31554903876089835,\n  'Romance': 0.3014829919759529,\n  'Comedy': 0.2649146003621397,\n  'Drama': 0.21694419668515033},\n 588: {'Disney': 0.5413679719752593,\n  'Musical': 0.4212795768896516,\n  'Animation': 0.4202404014708733,\n  'Children': 0.4132878122928225,\n  'Adventure': 0.33800668066835543,\n  'Comedy': 0.2603591942306689},\n 589: {'Scifi masterpiece': 0.3877789485554038,\n  'Suspense': 0.3877789485554038,\n  'nuclear war': 0.3877789485554038,\n  'Arnold Schwarzenegger': 0.3585126178245332,\n  'apocalypse': 0.3585126178245332,\n  'robots': 0.29500687507113765,\n  'time travel': 0.27948065591463706,\n  'sci-fi': 0.26815390919322735,\n  'Sci-Fi': 0.17349297934277708,\n  'Action': 0.15708291003938193},\n 590: {'American Indians': 0.6129400730679831,\n  'Native Americans': 0.6129400730679831,\n  'Western': 0.3981170492873505,\n  'Adventure': 0.2538936605943653,\n  'Drama': 0.16015541774026032},\n 592: {'superhero': 0.6899805975877804,\n  'Crime': 0.4361363906087659,\n  'Action': 0.4212893415935239,\n  'Thriller': 0.39525575821088627},\n 593: {'Hannibal Lector': 0.4732242466675318,\n  'drama': 0.41661726123172466,\n  'gothic': 0.38090222986028777,\n  'disturbing': 0.3451871984888508,\n  'suspense': 0.32429524442448054,\n  'psychology': 0.3215093822613042,\n  'Horror': 0.24079815693396642,\n  'Crime': 0.1984511186247027,\n  'Thriller': 0.1798495815731384},\n 594: {'Disney': 0.5370816940469133,\n  'Musical': 0.4179440981661936,\n  'Animation': 0.41691315041305455,\n  'Children': 0.4100156083214245,\n  'Fantasy': 0.38754812382514553,\n  'Drama': 0.21152555966792394},\n 595: {'Disney': 0.47077970136151825,\n  'IMAX': 0.44681600071494665,\n  'Musical': 0.36634947700024023,\n  'Animation': 0.3654457983220767,\n  'Children': 0.35939974826671495,\n  'Fantasy': 0.3397058436731638,\n  'Romance': 0.25766475156989704},\n 596: {'Disney': 0.5495158832918604,\n  'Musical': 0.42762008613601127,\n  'Animation': 0.42656527050652066,\n  'Children': 0.41950804070882475,\n  'Fantasy': 0.3965204026545654},\n 597: {'prostitution': 0.8524410626162622,\n  'Romance': 0.39274268133863427,\n  'Comedy': 0.34510494204024034},\n 599: {},\n 600: {},\n 602: {},\n 605: {},\n 606: {},\n 608: {'KIDNAPPING': 0.4909387601634997,\n  'Steve Buscemi': 0.43221276832738614,\n  'based on a true story': 0.43221276832738614,\n  'Coen Brothers': 0.38692071421047886,\n  'dark comedy': 0.34273085793390545,\n  'Crime': 0.2058798694630701,\n  'Thriller': 0.18658200887891813,\n  'Comedy': 0.15664220848530996,\n  'Drama': 0.12827763377472962},\n 609: {},\n 610: {},\n 611: {},\n 612: {},\n 613: {},\n 615: {},\n 616: {'Disney': 0.6764520682130168,\n  'Animation': 0.5251003078080728,\n  'Children': 0.516412883408421},\n 617: {},\n 618: {},\n 619: {},\n 626: {},\n 627: {},\n 628: {'edward norton': 0.4550111213322896,\n  'priest': 0.4332834156902077,\n  'twist ending': 0.3470382801377636,\n  'suspense': 0.3372681937875261,\n  'psychology': 0.33437088734819503,\n  'thought-provoking': 0.33437088734819503,\n  'Mystery': 0.2309071166653951,\n  'Crime': 0.20638986073461835,\n  'Thriller': 0.18704419683446885,\n  'Drama': 0.1285953941936117},\n 631: {},\n 632: {},\n 633: {},\n 634: {},\n 635: {},\n 636: {},\n 637: {},\n 638: {'babies': 0.9307848602476919, 'Romance': 0.36556742734232295},\n 639: {},\n 640: {},\n 645: {},\n 647: {'Gulf War': 0.7701441160053548,\n  'War': 0.4058929496269245,\n  'Crime': 0.32296730862751377,\n  'Action': 0.31197278589387895,\n  'Drama': 0.20123134061322948},\n 648: {'based on a TV show': 0.712227834446515,\n  'Mystery': 0.39356621297410777,\n  'Adventure': 0.3474690495467946,\n  'Action': 0.33980283220044005,\n  'Thriller': 0.31880470931348176},\n 649: {},\n 650: {},\n 653: {},\n 656: {},\n 661: {},\n 662: {},\n 663: {},\n 665: {},\n 667: {},\n 668: {'India': 0.9442351365080458, 'Drama': 0.3292719347038736},\n 670: {'India': 0.9442351365080458, 'Drama': 0.3292719347038736},\n 671: {'spoof': 0.8152464717634448,\n  'Sci-Fi': 0.4714966326412556,\n  'Comedy': 0.3362500790858071},\n 673: {'Bugs Bunny': 0.6704806597603965,\n  'Animation': 0.34529684881227235,\n  'Children': 0.33958414930536013,\n  'Fantasy': 0.32097607328372935,\n  'Sci-Fi': 0.2999742190412441,\n  'Adventure': 0.2777282748250193,\n  'Comedy': 0.21392804930818468},\n 674: {},\n 678: {},\n 679: {},\n 680: {},\n 685: {},\n 688: {},\n 691: {},\n 692: {},\n 694: {},\n 695: {},\n 697: {},\n 698: {},\n 700: {},\n 703: {},\n 704: {},\n 706: {},\n 707: {'bad': 0.7786596614467045,\n  'Crime': 0.4221105729848017,\n  'Thriller': 0.38254463091479096,\n  'Drama': 0.263004564919342},\n 708: {'Veterinarian': 0.9003338334747673,\n  'Romance': 0.3269203604093219,\n  'Comedy': 0.28726654217027975},\n 709: {},\n 710: {},\n 711: {},\n 714: {},\n 715: {},\n 718: {},\n 719: {},\n 720: {'Aardman': 0.7565483690252547,\n  'Animation': 0.4588876840477525,\n  'Adventure': 0.3690913637567526,\n  'Comedy': 0.2843030494994694},\n 722: {},\n 724: {},\n 725: {},\n 726: {},\n 728: {'In Netflix queue': 0.8269042008245158, 'Comedy': 0.5623428159572849},\n 731: {},\n 733: {'Alcatraz': 0.575903282410993,\n  'Michael Bay': 0.5324388911777335,\n  'terrorism': 0.47498209127931174,\n  'Adventure': 0.2385521830670386,\n  'Action': 0.23328900096139615,\n  'Thriller': 0.2188729024296642},\n 735: {},\n 736: {'Disaster': 0.7876308100821922,\n  'Adventure': 0.32625452039337177,\n  'Action': 0.31905635967422713,\n  'Thriller': 0.2993402654765393,\n  'Romance': 0.2859967477927429},\n 737: {},\n 741: {},\n 742: {},\n 743: {},\n 745: {'Aardman': 0.7322596265726776,\n  'Animation': 0.444155242304661,\n  'Children': 0.43680699848942195,\n  'Comedy': 0.27517559138774045},\n 747: {},\n 748: {'aliens': 0.7068192317936239,\n  'Sci-Fi': 0.4437396293120744,\n  'Action': 0.40176791323884314,\n  'Thriller': 0.37694065691612466},\n 750: {'dark comedy': 0.619577826999807,\n  'black comedy': 0.4315850716129275,\n  'Atomic bomb': 0.2958344360409081,\n  'Quirky': 0.2958344360409081,\n  'Slim Pickens': 0.2958344360409081,\n  'purity of essence': 0.2958344360409081,\n  'satire': 0.21579253580646374,\n  'War': 0.15591511945671613,\n  'Comedy': 0.0943909162764438},\n 757: {},\n 759: {},\n 760: {},\n 761: {},\n 762: {},\n 764: {},\n 765: {},\n 766: {},\n 773: {},\n 775: {},\n 778: {'drug abuse': 0.514976816223914,\n  'narrated': 0.514976816223914,\n  'based on a book': 0.4594043920971674,\n  'dark comedy': 0.38886006825174424,\n  'Crime': 0.23358988033259803,\n  'Comedy': 0.17772516968532856,\n  'Drama': 0.1455429187949957},\n 779: {},\n 780: {'aliens': 0.6537947219562973,\n  'Sci-Fi': 0.41045095339424126,\n  'Adventure': 0.3800121075431277,\n  'Action': 0.3716278919864568,\n  'Thriller': 0.3486631388864891},\n 781: {},\n 782: {},\n 783: {},\n 784: {},\n 785: {},\n 786: {},\n 788: {},\n 790: {},\n 791: {},\n 795: {},\n 798: {},\n 799: {},\n 800: {'Western': 0.6733760280055606,\n  'Mystery': 0.4864076691416888,\n  'In Netflix queue': 0.4864076691416888,\n  'Drama': 0.2708872158440726},\n 801: {},\n 802: {},\n 803: {},\n 804: {},\n 805: {'John Grisham': 0.8787273366828048,\n  'Thriller': 0.3933325820587667,\n  'Drama': 0.270421426032273},\n 806: {},\n 808: {},\n 809: {},\n 810: {},\n 813: {},\n 818: {'based on a TV show': 0.9360859340412244,\n  'Comedy': 0.35177140885832164},\n 823: {},\n 824: {},\n 828: {},\n 829: {},\n 830: {'adultery': 0.9180661745326452, 'Comedy': 0.3964271675591805},\n 832: {'GIVE ME BACK MY SON!': 0.5020007374810637,\n  'It was melodramatic and kind of dumb': 0.5020007374810637,\n  'Mel Gibson': 0.5020007374810637,\n  'kidnapping': 0.4040646726041168,\n  'Crime': 0.21051881556173432,\n  'Thriller': 0.19078612987640617},\n 833: {},\n 835: {},\n 836: {'seen at the cinema': 0.8219637525067822,\n  'Adventure': 0.34047600274404183,\n  'Action': 0.3329640731443889,\n  'Thriller': 0.31238855151158545},\n 837: {},\n 838: {'Jane Austen': 0.8395306108294729,\n  'Romance': 0.3590362315126487,\n  'Comedy': 0.31548691739893747,\n  'Drama': 0.25835894196176745},\n 839: {},\n 840: {},\n 841: {},\n 842: {},\n 848: {},\n 849: {},\n 851: {},\n 852: {'golf': 0.8483274342206527,\n  'Romance': 0.34989008769431595,\n  'Comedy': 0.30745015546219273,\n  'Drama': 0.25177746679983426},\n 858: {'Mafia': 0.8386419762344016,\n  'Crime': 0.462290799731766,\n  'Drama': 0.2880396711930868},\n 861: {},\n 866: {},\n 867: {},\n 869: {},\n 870: {},\n 875: {},\n 876: {},\n 879: {},\n 880: {},\n 881: {},\n 882: {},\n 885: {},\n 886: {},\n 888: {},\n 889: {},\n 891: {},\n 892: {'Shakespeare': 0.798736976170971,\n  'Romance': 0.39760743374097796,\n  'Comedy': 0.3493796241046615,\n  'Drama': 0.28611439983274684},\n 893: {},\n 896: {},\n 897: {},\n 898: {'divorce': 0.8152675744036024,\n  'Romance': 0.3826754124243291,\n  'Comedy': 0.3362587829130148,\n  'Drama': 0.2753694641128429},\n 899: {'movie business': 0.738046241512493,\n  'Musical': 0.49255476741223,\n  'Romance': 0.34642877838681185,\n  'Comedy': 0.30440868580608577},\n 900: {'France': 0.8258869797069524,\n  'Musical': 0.46118945193347277,\n  'Romance': 0.32436859616157526},\n 901: {},\n 902: {'Capote': 0.9001603041299965,\n  'Romance': 0.3535395778664867,\n  'Drama': 0.2544036040440045},\n 903: {'falling': 0.47561236917995814,\n  'James Stewart': 0.43971710217805476,\n  'Alfred Hitchcock': 0.41871971702856775,\n  'Atmospheric': 0.3922661402547555,\n  'imdb top 250': 0.3514351475541456,\n  'Mystery': 0.2231457726024427,\n  'Thriller': 0.18075719109997446,\n  'Romance': 0.17269968245817463,\n  'Drama': 0.12427299342198928},\n 904: {'photographer': 0.4376627259563493,\n  'voyeurism': 0.4376627259563493,\n  'James Stewart': 0.4046315824811051,\n  'photography': 0.3853096021921469,\n  'mystery': 0.36096678591492437,\n  'imdb top 250': 0.3233937438183444,\n  'Mystery': 0.20534072167048284,\n  'Thriller': 0.16633437252573713},\n 905: {'Screwball': 0.8765658673280525,\n  'Romance': 0.36153694412938475,\n  'Comedy': 0.31768430597844327},\n 906: {'Brooch': 0.9080738870632128,\n  'Thriller': 0.34511483673099247,\n  'Drama': 0.23727107936292552},\n 907: {'divorce': 0.738046241512493,\n  'Musical': 0.49255476741223,\n  'Romance': 0.34642877838681185,\n  'Comedy': 0.30440868580608577},\n 908: {'Mount Rushmore': 0.5631550453178534,\n  'Alfred Hitchcock': 0.4957905565519056,\n  'imdb top 250': 0.4161213821843581,\n  'Mystery': 0.2642186701306495,\n  'Adventure': 0.23327157453136033,\n  'Action': 0.2281248986089544,\n  'Thriller': 0.2140279158865146,\n  'Romance': 0.2044873174110261},\n 909: {'adultery': 0.8024463623483842,\n  'Romance': 0.39433230549580106,\n  'Comedy': 0.34650175267145317,\n  'Drama': 0.28375764975031625},\n 910: {'men in drag': 0.8366581623702138,\n  'Crime': 0.4359012240221272,\n  'Comedy': 0.3316522911654738},\n 911: {'heist': 0.633696082027167,\n  'Mystery': 0.4125238097729987,\n  'Crime': 0.36872285652477255,\n  'Thriller': 0.33416113712033274,\n  'Romance': 0.31926542960399085,\n  'Comedy': 0.2805401165042645},\n 912: {'start of a beautiful friendship': 0.9128246380527296,\n  'Romance': 0.3314558984315574,\n  'Drama': 0.2385124054610833},\n 913: {'statue': 0.7916207182107255,\n  'Film-Noir': 0.48517196267841095,\n  'Mystery': 0.37140921519304476},\n 914: {'George Bernard Shaw': 0.7749700882318534,\n  'Musical': 0.4327565866013553,\n  'Romance': 0.3043708954900507,\n  'Comedy': 0.2674522155035692,\n  'Drama': 0.21902230365849543},\n 915: {'rich guy - poor girl': 0.9003338334747673,\n  'Romance': 0.3269203604093219,\n  'Comedy': 0.28726654217027975},\n 916: {'royalty': 0.6809376673211157,\n  'Italy': 0.6295461540552015,\n  'Romance': 0.2472553838810513,\n  'Comedy': 0.2172645321679066,\n  'Drama': 0.17792254309467534},\n 917: {},\n 918: {'1900s': 0.8885687079611593, 'Musical': 0.4587435571561042},\n 919: {'Dorothy': 0.5848236987376231,\n  'Toto': 0.5848236987376231,\n  'Musical': 0.30192837252132243,\n  'Children': 0.2962007260588257,\n  'Fantasy': 0.27996991658365017,\n  'Adventure': 0.24224722154585357},\n 920: {'Civil War': 0.7663928799651001,\n  'War': 0.489737416478243,\n  'Romance': 0.3374132850778832,\n  'Drama': 0.24279928231559283},\n 921: {'television': 0.9296258483367599, 'Comedy': 0.36850479251179297},\n 922: {'eerie': 0.5575285412502868,\n  'movies': 0.5120160156963979,\n  'movie business': 0.4665034901425088,\n  'Film-Noir': 0.3695944424223391,\n  'Romance': 0.2189703369697573,\n  'Drama': 0.15756890145089758},\n 923: {'Rosebud': 0.8809979180218227,\n  'Mystery': 0.4133428266743388,\n  'Drama': 0.23019638589274474},\n 924: {'space': 0.25194089687767923,\n  'Arthur C. Clarke': 0.17269548472773522,\n  'Dull': 0.17269548472773522,\n  'Hal': 0.17269548472773522,\n  'Oscar (Best Effects - Visual Effects)': 0.17269548472773522,\n  'apes': 0.17269548472773522,\n  'computer': 0.17269548472773522,\n  'confusing ending': 0.17269548472773522,\n  'relaxing': 0.17269548472773522,\n  'revolutionary': 0.17269548472773522,\n  'setting:space/space ship': 0.17269548472773522,\n  'slow': 0.17269548472773522,\n  'slow paced': 0.17269548472773522,\n  'spacecraft': 0.17269548472773522,\n  'superb soundtrack': 0.17269548472773522,\n  'technology': 0.17269548472773522,\n  'tedious': 0.17269548472773522,\n  'visual': 0.17269548472773522,\n  'Stanley Kubrick': 0.15966186546965475,\n  'futuristic': 0.15966186546965475,\n  'meditative': 0.15966186546965475,\n  'space travel': 0.15966186546965475,\n  'masterpiece': 0.15203768695500058,\n  'overrated': 0.15203768695500058,\n  'soundtrack': 0.14662824621157428,\n  'mystery': 0.1424323579943332,\n  'artificial intelligence': 0.1424323579943332,\n  'cult film': 0.1390040676969201,\n  'future': 0.1390040676969201,\n  'robots': 0.13137988918226592},\n 926: {'Hollywood': 0.9586683655604169, 'Drama': 0.28452585976272676},\n 927: {'divorce': 0.9244542263279282, 'Comedy': 0.38129304140573},\n 928: {'Mrs. DeWinter': 0.7994407918636769,\n  'Mystery': 0.37507820382785373,\n  'Thriller': 0.30382866668747444,\n  'Romance': 0.2902850721418675,\n  'Drama': 0.20888628367643194},\n 929: {'Europe': 0.5293791839644856,\n  'war': 0.5293791839644856,\n  'journalism': 0.4176710136890362,\n  'Film-Noir': 0.35093379056178725,\n  'Mystery': 0.26864710610589776,\n  'Thriller': 0.2176151299238519,\n  'Drama': 0.14961332074802314},\n 930: {'assassination': 0.6985074719883685,\n  'Film-Noir': 0.5431939705366967,\n  'Thriller': 0.33683626271202305,\n  'Romance': 0.32182130767118833},\n 931: {'amnesia': 0.7603354944112808,\n  'Mystery': 0.4325268839856453,\n  'Thriller': 0.35036444433908104,\n  'Romance': 0.33474645137923176},\n 932: {},\n 933: {},\n 934: {'wedding': 0.9296258483367599, 'Comedy': 0.36850479251179297},\n 935: {},\n 936: {'Russia': 0.7055699848308934,\n  'Cold War': 0.6211698507791709,\n  'Romance': 0.25619962858074696,\n  'Comedy': 0.225123884347714},\n 937: {},\n 938: {'prostitution': 0.8365023688703782, 'Musical': 0.5479633079634492},\n 940: {'swashbuckler': 0.7698458060942437,\n  'Adventure': 0.3866427830450392,\n  'Action': 0.3781122747477138,\n  'Romance': 0.33893347554262065},\n 941: {'swashbuckler': 0.8936271991295439, 'Adventure': 0.4488100143444735},\n 942: {},\n 943: {'ghosts': 0.757714442265293,\n  'Fantasy': 0.4768085795307684,\n  'Romance': 0.36165631672027787,\n  'Drama': 0.26024432951510945},\n 944: {'Shangri-La': 0.9675177246180529, 'Drama': 0.2528031893586504},\n 945: {'Astaire and Rogers': 0.751212504939367,\n  'Musical': 0.4818315623771045,\n  'Romance': 0.33888682149899885,\n  'Comedy': 0.29778153088172826},\n 946: {},\n 947: {'butler': 0.6113667776090999,\n  'homeless': 0.5190850156515235,\n  'screwball': 0.5190850156515235,\n  'Romance': 0.22199348713451875,\n  'Comedy': 0.19506677820130439},\n 948: {'oil': 0.7851900546453289,\n  'Western': 0.5099969171872053,\n  'Romance': 0.2851104847007398,\n  'Drama': 0.20516270143308585},\n 949: {},\n 950: {'Nick and Nora Charles': 0.8366581623702138,\n  'Crime': 0.4359012240221272,\n  'Comedy': 0.3316522911654738},\n 951: {'Screwball': 0.8765658673280525,\n  'Romance': 0.36153694412938475,\n  'Comedy': 0.31768430597844327},\n 952: {'race': 0.8285051003764471,\n  'Adventure': 0.4436301141375254,\n  'Comedy': 0.34171862764640115},\n 953: {'Christmas': 0.6826255712386285,\n  'Children': 0.446926081423213,\n  'Fantasy': 0.42243602640685124,\n  'Romance': 0.32041507623583604,\n  'Drama': 0.23056753836826435},\n 954: {'Politics': 0.9675177246180529, 'Drama': 0.2528031893586504},\n 955: {'leopard': 0.7152809200839184,\n  'screwball': 0.6073140072298728,\n  'Romance': 0.2597257678135447,\n  'Comedy': 0.22822232038072368},\n 956: {'adoption': 0.9001603041299965,\n  'Romance': 0.3535395778664867,\n  'Drama': 0.2544036040440045},\n 959: {},\n 961: {},\n 963: {},\n 965: {'fugitive': 0.8147631253352019,\n  'Mystery': 0.41347253993610583,\n  'Thriller': 0.33492964730717184,\n  'Drama': 0.23026862501759268},\n 968: {'zombies': 0.7279233020822816,\n  'Horror': 0.44910109155226347,\n  'Sci-Fi': 0.39487134317268374,\n  'Thriller': 0.3354288273139228},\n 969: {'missionary': 0.7708290687086942,\n  'War': 0.4062539437153497,\n  'Adventure': 0.3192948585779432,\n  'Romance': 0.27989586483011003,\n  'Comedy': 0.2459458846699994},\n 970: {'crime': 0.6445824175472585,\n  'Crime': 0.3978673994157477,\n  'Adventure': 0.3929937412769593,\n  'Romance': 0.34450076514678213,\n  'Comedy': 0.3027145310093995,\n  'Drama': 0.2478993632853078},\n 971: {'Tennessee Williams': 0.9586683655604169, 'Drama': 0.28452585976272676},\n 973: {'journalism': 0.8705052344183988,\n  'Comedy': 0.38077214484325994,\n  'Drama': 0.3118224022767317},\n 976: {'Hemingway': 0.8222114999155535,\n  'War': 0.46870850728931523,\n  'Romance': 0.3229250448652716},\n 979: {},\n 981: {},\n 982: {},\n 984: {},\n 986: {'Animal movie': 0.7694086731234868,\n  'Children': 0.49445135784165883,\n  'Adventure': 0.40438613780753585},\n 987: {},\n 988: {},\n 990: {},\n 991: {'Ireland': 0.9533034808868223, 'Drama': 0.30201402836469093},\n 993: {},\n 994: {'food': 0.9055676797451655,\n  'Comedy': 0.3281951198182884,\n  'Drama': 0.26876595902088607},\n 996: {},\n 998: {},\n 999: {},\n 1003: {},\n 1004: {},\n 1005: {},\n 1006: {'John Grisham': 0.7004332489706262,\n  'death penalty': 0.6803897737722728,\n  'Drama': 0.2155528229519815},\n 1007: {},\n 1008: {},\n 1009: {},\n 1010: {'race': 0.6545214559877784,\n  'Disney': 0.5613283733928476,\n  'Children': 0.4285258593539785,\n  'Comedy': 0.2699587167340826},\n 1011: {},\n 1012: {},\n 1013: {'twins': 0.7545111823874084,\n  'Children': 0.47476676523669853,\n  'Romance': 0.3403749201504016,\n  'Comedy': 0.2990891305474741},\n 1014: {},\n 1015: {},\n 1016: {},\n 1017: {},\n 1018: {},\n 1019: {},\n 1020: {},\n 1021: {},\n 1022: {'Disney': 0.5262308110717149,\n  'Musical': 0.40950020117688374,\n  'Animation': 0.40849008208639853,\n  'Children': 0.4017318938824216,\n  'Fantasy': 0.37971832924175947,\n  'Romance': 0.2880139708893195},\n 1023: {},\n 1024: {},\n 1025: {'King Arthur': 0.5752505332987029,\n  'Disney': 0.449491505295913,\n  'Musical': 0.34978351319092943,\n  'Animation': 0.3489206979757092,\n  'Children': 0.34314804437017055,\n  'Fantasy': 0.3243446788144589},\n 1027: {},\n 1028: {'nanny': 0.6602147457500096,\n  'Disney': 0.43801233002408124,\n  'Musical': 0.3408506941992281,\n  'Children': 0.3343846828847999,\n  'Fantasy': 0.3160615202392016,\n  'Comedy': 0.21065253803632716},\n 1029: {'Disney': 0.5826132242943247,\n  'Musical': 0.45337564341956604,\n  'Animation': 0.4522572962459555,\n  'Children': 0.44477500950586896,\n  'Drama': 0.22945780819710107},\n 1030: {'Disney': 0.5607056861280519,\n  'Musical': 0.4363277224357901,\n  'Animation': 0.4352514275747042,\n  'Children': 0.42805049126659245,\n  'Adventure': 0.3500803106406835},\n 1031: {},\n 1032: {'Disney': 0.5197745589823306,\n  'Musical': 0.40447610058485045,\n  'Animation': 0.40347837450395585,\n  'Children': 0.39680310156414506,\n  'Fantasy': 0.3750596182636778,\n  'Adventure': 0.32452469017783875},\n 1033: {'Disney': 0.6536524746063475,\n  'Animation': 0.5074019753121908,\n  'Children': 0.499007357683496,\n  'Drama': 0.2574360791886419},\n 1034: {},\n 1035: {'Rogers and Hammerstein': 0.8258869797069524,\n  'Musical': 0.46118945193347277,\n  'Romance': 0.32436859616157526},\n 1036: {},\n 1037: {},\n 1040: {},\n 1041: {'In Netflix queue': 0.873652990606142, 'Drama': 0.4865495370514129},\n 1042: {'Music': 0.9244701906066038,\n  'Comedy': 0.2949676499106055,\n  'Drama': 0.2415552777026491},\n 1043: {},\n 1046: {},\n 1047: {},\n 1049: {},\n 1050: {},\n 1051: {},\n 1053: {},\n 1054: {},\n 1055: {},\n 1056: {},\n 1057: {},\n 1059: {'Amazing Cinematography': 0.481629626047732,\n  'shakespeare': 0.481629626047732,\n  'updated classics': 0.481629626047732,\n  'Leonardo DiCaprio': 0.36640475572830594,\n  'Shakespeare': 0.3513183918520803,\n  'Romance': 0.17488460954938032,\n  'Drama': 0.1258452454734584},\n 1060: {},\n 1061: {},\n 1064: {},\n 1066: {'Astaire and Rogers': 0.751212504939367,\n  'Musical': 0.4818315623771045,\n  'Romance': 0.33888682149899885,\n  'Comedy': 0.29778153088172826},\n 1068: {'anti-Semitism': 0.7643758966994297,\n  'Film-Noir': 0.5321269522581711,\n  'Crime': 0.3641021769028206},\n 1073: {},\n 1076: {'governess': 0.824327552080362,\n  'Horror': 0.41945558928702786,\n  'Thriller': 0.31328691706919337,\n  'Drama': 0.21538895767970737},\n 1077: {},\n 1078: {},\n 1079: {'fish': 0.8687938074640902,\n  'Crime': 0.3940787917547715,\n  'Comedy': 0.299831996293172},\n 1080: {'parody': 0.6142384469156611,\n  'Bible': 0.5823001015364795,\n  'religion': 0.4799560332982429,\n  'Comedy': 0.23082445317136224},\n 1081: {'cross dressing': 0.759118471136939,\n  'Musical': 0.4751829401576026,\n  'Romance': 0.33421064287716085,\n  'Comedy': 0.2936725495335366},\n 1082: {'politics': 0.9343842187988722, 'Drama': 0.35626693876870086},\n 1083: {},\n 1084: {'1920s': 0.6753678687554147,\n  'gangsters': 0.6431176806512837,\n  'Crime': 0.3063421394381078,\n  'Drama': 0.19087269131797102},\n 1085: {},\n 1086: {},\n 1088: {'dance': 0.665580596812765,\n  'music': 0.5387759477352106,\n  'Musical': 0.3903097000622781,\n  'Romance': 0.2745167066303795,\n  'Drama': 0.1975395228973017},\n 1089: {'Quentin Tarantino': 0.5391422799992913,\n  'Tarantino': 0.302180148494328,\n  'neo-noir': 0.302180148494328,\n  'ensemble cast': 0.28775043236312486,\n  'nonlinear': 0.28775043236312486,\n  'humorous': 0.26957113999964566,\n  'violence': 0.2575967462395298,\n  'stylized': 0.24865295357463374,\n  'heist': 0.23556634245690983,\n  'religion': 0.2168437103966655,\n  'Mystery': 0.15334910188137996,\n  'Crime': 0.1370668008770818,\n  'Thriller': 0.12421903668848003},\n 1090: {'Vietnam': 0.7865610984208721,\n  'War': 0.5532518932843257,\n  'Drama': 0.2742881350482742},\n 1091: {},\n 1092: {},\n 1093: {'1960s': 0.6230382881793752,\n  'Jim Morrison': 0.6230382881793752,\n  'music': 0.4440097985852592,\n  'Drama': 0.1627939854088758},\n 1094: {},\n 1095: {},\n 1096: {'Holocaust': 0.9427905719458024, 'Drama': 0.33338556874901887},\n 1097: {'aliens': 0.7012329015926825,\n  'Children': 0.498362806210317,\n  'Sci-Fi': 0.4402325429439089,\n  'Drama': 0.2571035574301236},\n 1099: {},\n 1100: {},\n 1101: {'Navy': 0.7222369659990437,\n  'predictable': 0.5692124260477891,\n  'Action': 0.29256638293487625,\n  'Romance': 0.2622515787439414},\n 1103: {'1950s': 0.7627938111515745,\n  'adolescence': 0.6096477809200426,\n  'Drama': 0.21558104018703492},\n 1104: {'Tennessee Williams': 0.9586683655604169,\n  'Drama': 0.28452585976272676},\n 1105: {},\n 1107: {},\n 1111: {},\n 1112: {},\n 1114: {},\n 1116: {},\n 1117: {},\n 1119: {},\n 1120: {'freedom of expression': 0.9244701906066038,\n  'Comedy': 0.2949676499106055,\n  'Drama': 0.2415552777026491},\n 1121: {},\n 1123: {},\n 1124: {'aging': 0.9675177246180529, 'Drama': 0.2528031893586504},\n 1125: {'Clousseau': 0.8580462025922904,\n  'Crime': 0.4087210122611976,\n  'Comedy': 0.3109724237824506},\n 1126: {},\n 1127: {},\n 1128: {},\n 1129: {},\n 1130: {},\n 1131: {},\n 1132: {},\n 1135: {'military': 0.9221778952547324, 'Comedy': 0.38676598803869994},\n 1136: {'Monty Python': 0.5300017976553429,\n  'british comedy': 0.46660309344248446,\n  'King Arthur': 0.4500015400038059,\n  'England': 0.3971242459099087,\n  'Fantasy': 0.2537252840455131,\n  'Adventure': 0.21953874846976135,\n  'Comedy': 0.16910592282073741},\n 1137: {},\n 1140: {},\n 1144: {},\n 1147: {'boxing': 0.7182597416492331,\n  'Documentary': 0.5254610688615756,\n  'In Netflix queue': 0.4560631629904211},\n 1148: {'Aardman': 0.6886061597272478,\n  'Animation': 0.4176770432608059,\n  'Children': 0.41076686308600274,\n  'Crime': 0.3401111020446171,\n  'Comedy': 0.25877107020508305},\n 1150: {},\n 1151: {},\n 1156: {},\n 1161: {},\n 1162: {},\n 1163: {},\n 1167: {},\n 1170: {},\n 1171: {'politics': 0.906555245893959, 'Comedy': 0.4220871783673886},\n 1172: {},\n 1173: {},\n 1175: {},\n 1176: {},\n 1177: {'Italy': 0.9001603041299965,\n  'Romance': 0.3535395778664867,\n  'Drama': 0.2544036040440045},\n 1178: {'military': 0.6302850880437375,\n  'court': 0.6043337049480509,\n  'War': 0.4366451390292844,\n  'Drama': 0.21647748939683562},\n 1179: {'crime': 0.6533466788169401,\n  'Film-Noir': 0.5893802369932474,\n  'Crime': 0.4032771247576785,\n  'Drama': 0.25127000252285864},\n 1180: {},\n 1183: {'adultery': 0.7302442964595297,\n  'War': 0.5208535476029239,\n  'Romance': 0.35885129587393844,\n  'Drama': 0.2582258640945369},\n 1184: {},\n 1185: {'In Netflix queue': 0.873652990606142, 'Drama': 0.4865495370514129},\n 1186: {},\n 1187: {'disability': 0.9491942361459574, 'Drama': 0.31469080391281296},\n 1188: {'dance': 0.7025851959885191,\n  'Australia': 0.5979677804660729,\n  'Romance': 0.2897791417803076,\n  'Comedy': 0.2546303691457896},\n 1189: {'police': 0.7402657582390453,\n  'Documentary': 0.507743152092847,\n  'In Netflix queue': 0.4406852603407463},\n 1190: {},\n 1191: {},\n 1192: {},\n 1193: {'jack nicholson': 0.6638260484461063,\n  'emotional': 0.5174895671977203,\n  'mental illness': 0.5063020206821397,\n  'Drama': 0.18761073822978827},\n 1194: {},\n 1196: {'George Lucas': 0.35923769186682347,\n  'Harrison Ford': 0.35923769186682347,\n  'I am your father': 0.35923769186682347,\n  'original plot': 0.33212541787584143,\n  'space opera': 0.2962849411708784,\n  'sequel': 0.27790086989387736,\n  'classic': 0.26917266717989635,\n  'space': 0.2620412062998754,\n  'music': 0.25601164203908366,\n  'sci-fi': 0.24841727938688635,\n  'Sci-Fi': 0.16072357121596823,\n  'Adventure': 0.1488043882577445,\n  'Action': 0.14552131373941515},\n 1197: {'Inigo Montoya': 0.597774054968028,\n  'six-fingered man': 0.597774054968028,\n  'Fantasy': 0.2861695801085404,\n  'Adventure': 0.24761155240593405,\n  'Action': 0.24214849323364795,\n  'Romance': 0.21705783147042698,\n  'Comedy': 0.19072979308911509},\n 1198: {'indiana jones': 0.6101677273238895,\n  'archaeology': 0.5371795913666692,\n  'ark of the covenant': 0.30508386366194473,\n  'Steven Spielberg': 0.2820586703453802,\n  'treasure hunt': 0.2820586703453802,\n  'adventure': 0.23600828371225113,\n  'Adventure': 0.12637264609849092,\n  'Action': 0.12358448360491463},\n 1199: {},\n 1200: {'space': 0.5280876365425674,\n  'SPACE TRAVEL': 0.3619831139035786,\n  'space craft': 0.3619831139035786,\n  'horror': 0.30734416266624026,\n  'action': 0.2640438182712837,\n  'aliens': 0.2579681739388058,\n  'sci-fi': 0.25031577246982367,\n  'suspense': 0.2480629494949963,\n  'Horror': 0.1841935768993643,\n  'Sci-Fi': 0.16195187783365436,\n  'Adventure': 0.14994160424575986,\n  'Action': 0.14663343930586564},\n 1201: {'spaghetti western': 0.7281099394292306,\n  'Western': 0.5115281093514619,\n  'Adventure': 0.32622000090837094,\n  'Action': 0.3190226017933895},\n 1202: {},\n 1203: {'Motivational': 0.37705534693845555,\n  'earnest': 0.37705534693845555,\n  'great screenplay': 0.37705534693845555,\n  'confrontational': 0.34859834449298277,\n  'claustrophobic': 0.331952065179451,\n  'good dialogue': 0.32014134204751,\n  'gritty': 0.29716644105615225,\n  'court': 0.27503806028850536,\n  'thought-provoking': 0.2561720633002003,\n  'Drama': 0.09852097986980426},\n 1204: {'Middle East': 0.8117431095562415,\n  'War': 0.42781707764787197,\n  'Adventure': 0.3362423809490887,\n  'Drama': 0.21210076240901687},\n 1206: {'brainwashing': 0.7695069344589217,\n  'Sci-Fi': 0.37238312976071125,\n  'Crime': 0.3490429609110952,\n  'Thriller': 0.3163259088986343,\n  'Drama': 0.21747830532519444},\n 1207: {'Harper Lee': 0.779200744193106,\n  'racism': 0.5927850840120338,\n  'Drama': 0.20359775151449003},\n 1208: {'Vietnam': 0.7238354782654259,\n  'War': 0.5091319028880231,\n  'Action': 0.3913231266456377,\n  'Drama': 0.25241457251546534},\n 1209: {'spaghetti western': 0.7526210293810056,\n  'Western': 0.5287481894824978,\n  'Action': 0.32976217732416285,\n  'Drama': 0.21270600522524863},\n 1210: {'darth vader': 0.5405234265089442,\n  'luke skywalker': 0.5405234265089442,\n  'space opera': 0.48219420437297783,\n  'Sci-Fi': 0.2615724384783038,\n  'Adventure': 0.24217435189110212,\n  'Action': 0.23683125379436154},\n 1211: {},\n 1212: {'Venice': 0.5170744014454731,\n  'ferris wheel': 0.5170744014454731,\n  'zither': 0.5170744014454731,\n  'Film-Noir': 0.3169068171524086,\n  'Mystery': 0.24259875116880758,\n  'Thriller': 0.19651489837435207},\n 1213: {'Mafia': 0.8386419762344016,\n  'Crime': 0.462290799731766,\n  'Drama': 0.2880396711930868},\n 1214: {'aliens': 0.7247233084832948,\n  'Horror': 0.5174645244554288,\n  'Sci-Fi': 0.4549797710570704},\n 1215: {},\n 1216: {},\n 1217: {'samurai': 0.821991838822293,\n  'War': 0.5102351270504236,\n  'Drama': 0.25296152283180073},\n 1218: {},\n 1219: {'Alfred Hitchcock': 0.6110122351710541,\n  'Norman Bates': 0.347016112388875,\n  'suspenseful': 0.3208262215968462,\n  'black and white': 0.2862050691568315,\n  'remade': 0.27931622679349827,\n  'tense': 0.26399612278217915,\n  'imdb top 250': 0.25641397609427286,\n  'psychology': 0.23576335471933313,\n  'Horror': 0.1765776814651209,\n  'Crime': 0.14552452916206987},\n 1220: {'Saturday Night Live': 0.8078098989331027,\n  'Musical': 0.41705000775098544,\n  'Action': 0.32723057854415993,\n  'Comedy': 0.25774523601077043},\n 1221: {'Mafia': 0.8453689136333568,\n  'Al Pacino': 0.4582432231194173,\n  'Crime': 0.23299946951540845,\n  'Drama': 0.1451750513450036},\n 1222: {'anti-war': 0.7240539440760675,\n  'Vietnam': 0.5425252854072179,\n  'War': 0.3816018131442815,\n  'Drama': 0.18918841657645058},\n 1223: {'moon': 0.6797176485758257,\n  'Animation': 0.37862971087814523,\n  'Children': 0.3723655420330601,\n  'Sci-Fi': 0.32893190950674817,\n  'Adventure': 0.3045384768537329,\n  'Comedy': 0.23457936478974528},\n 1224: {'Shakespeare': 0.6731980033405864,\n  'War': 0.48640119416830335,\n  'Action': 0.37385210988826045,\n  'Romance': 0.33511473550525345,\n  'Drama': 0.24114526864368746},\n 1225: {'Mozart': 0.695338213751387,\n  'Salieri': 0.695338213751387,\n  'Drama': 0.1816852690618064},\n 1226: {},\n 1227: {},\n 1228: {'boxing': 0.9427905719458024, 'Drama': 0.33338556874901887},\n 1230: {'New York': 0.8572903009918638,\n  'Romance': 0.3867406137341949,\n  'Comedy': 0.33983089546681566},\n 1231: {'NASA': 0.7506760870124646,\n  'space': 0.6219708100483131,\n  'Drama': 0.2227952509267193},\n 1232: {},\n 1233: {'submarine': 0.7913567995548353,\n  'War': 0.45111952860145127,\n  'Action': 0.3467343205598576,\n  'Drama': 0.22365352145366854},\n 1234: {'The Entertainer': 0.8846904300816097,\n  'Crime': 0.37100340213465777,\n  'Comedy': 0.28227525312454477},\n 1235: {'May-December romance': 0.8764092888721928,\n  'Romance': 0.3182331152416974,\n  'Comedy': 0.2796330167539845,\n  'Drama': 0.22899742069108042},\n 1236: {},\n 1237: {'chess': 0.4263225810635472,\n  'reflective': 0.4263225810635472,\n  'existentialism': 0.3803170550994978,\n  'cerebral': 0.3711629746061439,\n  'death': 0.3711629746061439,\n  'philosophical': 0.34551521819022973,\n  'atmospheric': 0.28711523342647494,\n  'Drama': 0.1204874294170672},\n 1238: {},\n 1240: {'robots': 0.5880371155493509,\n  'special effects': 0.3864798308925736,\n  'Sci-Fi': 0.34582350365966447,\n  'artificial intelligence': 0.31875317248777907,\n  'Action': 0.3131133174417789,\n  'tense': 0.29401855777467545,\n  'time travel': 0.27854435378201575,\n  'Thriller': 0.14688223682950954},\n 1241: {},\n 1242: {'Civil War': 0.8141366071304736,\n  'War': 0.520246428508831,\n  'Drama': 0.25792486997939273},\n 1243: {'Shakespeare sort of': 0.9055676797451655,\n  'Comedy': 0.3281951198182884,\n  'Drama': 0.26876595902088607},\n 1244: {'black and white': 0.832219238252512,\n  'Romance': 0.3663940968457076,\n  'Comedy': 0.32195231016106457,\n  'Drama': 0.2636535895090006},\n 1245: {'Mafia': 0.6564727404408262,\n  'Film-Noir': 0.5288680513151305,\n  'Crime': 0.361872308780869,\n  'Thriller': 0.3279527158535428,\n  'Drama': 0.22547189105992144},\n 1246: {'highschool': 0.7365411852286794,\n  'High School': 0.6484362826614107,\n  'Drama': 0.19245121405224577},\n 1247: {'Simon and Garfunkel': 0.8764092888721928,\n  'Romance': 0.3182331152416974,\n  'Comedy': 0.2796330167539845,\n  'Drama': 0.22899742069108042},\n 1248: {},\n 1249: {'hit men': 0.7342570689986396,\n  'Crime': 0.36265865094801614,\n  'Action': 0.35031294698391646,\n  'Thriller': 0.32866535134138886,\n  'Romance': 0.3140146262854285},\n 1250: {'POW': 0.7891950303819898,\n  'War': 0.4498871940960199,\n  'Adventure': 0.3535883656936643,\n  'Drama': 0.2230425615322414},\n 1251: {},\n 1252: {'incest': 0.7224304614430147,\n  'Film-Noir': 0.44276633596605197,\n  'Mystery': 0.33894682711509644,\n  'Crime': 0.3029581307625819,\n  'Thriller': 0.2745607756178609},\n 1253: {'aliens': 0.7426874098601339,\n  'Sci-Fi': 0.4662575961746113,\n  'Thriller': 0.3960688498042356,\n  'Drama': 0.27230264680951655},\n 1254: {'Cold': 0.7400664465334397,\n  'Western': 0.4806882155635939,\n  'Adventure': 0.30655228373785687,\n  'Action': 0.29978881390300105,\n  'Drama': 0.1933723313387718},\n 1255: {},\n 1256: {},\n 1257: {'skiing': 0.9003338334747673,\n  'Romance': 0.3269203604093219,\n  'Comedy': 0.28726654217027975},\n 1258: {'Horror': 0.4153613572394258,\n  'Stanley Kubrick': 0.3773376503043087,\n  'jack nicholson': 0.3773376503043087,\n  'masterpiece': 0.3593190107390145,\n  'psychological': 0.3058151417901276,\n  'Stephen King': 0.2977128751556895,\n  'disturbing': 0.2977128751556895,\n  'suspense': 0.27969423559039536,\n  'atmospheric': 0.2541253791376587},\n 1259: {'Stephen King': 0.8302299325354088,\n  'Adventure': 0.4714596569320508,\n  'Drama': 0.2973954455060866},\n 1260: {'chilly': 0.41673814025511524,\n  'menacing': 0.41673814025511524,\n  'oninous': 0.41673814025511524,\n  'serial killer': 0.3223823510640609,\n  'creepy': 0.3170378818592674,\n  'mental illness': 0.2938588917778651,\n  'atmospheric': 0.25947849160335795,\n  'Film-Noir': 0.255412291239077,\n  'Crime': 0.17476312908647101,\n  'Thriller': 0.15838195248500717},\n 1261: {},\n 1262: {'POW': 0.7458628202562406,\n  'War': 0.4251853071391081,\n  'Adventure': 0.33417394369351305,\n  'Action': 0.32680105656245967,\n  'Drama': 0.21079599791840012},\n 1263: {'Vietnam': 0.7865610984208721,\n  'War': 0.5532518932843257,\n  'Drama': 0.2742881350482742},\n 1264: {},\n 1265: {},\n 1266: {'revenge': 0.7414420627917475,\n  'Western': 0.6225324184394107,\n  'Drama': 0.25043373477847736},\n 1267: {'brainwashing': 0.6419778438268456,\n  'assassination': 0.5472609784317731,\n  'War': 0.36596481188088276,\n  'Crime': 0.2911966577743748,\n  'Thriller': 0.26390174779140096},\n 1268: {},\n 1269: {'murder': 0.7496758777380994,\n  'Mystery': 0.45467497432818266,\n  'Thriller': 0.36830530224490404,\n  'Comedy': 0.3092053531159616},\n 1270: {'time travel': 0.7232603831311846,\n  'Sci-Fi': 0.44897775947811464,\n  'Adventure': 0.4156817841653079,\n  'Comedy': 0.32019063696506},\n 1271: {},\n 1272: {'World War II': 0.7910886439912452,\n  'War': 0.5480456661479436,\n  'Drama': 0.27170702082307246},\n 1273: {},\n 1274: {'visually stunning': 0.5250026069857492,\n  'animation': 0.46331233532709354,\n  'anime': 0.4510373379091421,\n  'Animation': 0.3184426010317401,\n  'Sci-Fi': 0.27664477936169235,\n  'Adventure': 0.2561289352032852,\n  'Action': 0.25047795682544177},\n 1275: {},\n 1276: {'prison': 0.9557656219396208, 'Drama': 0.2941293523577166},\n 1277: {'In Netflix queue': 0.6493502433442517,\n  'Romance': 0.5025530151067855,\n  'Comedy': 0.4415958269659919,\n  'Drama': 0.36163220830294135},\n 1278: {'spoof': 0.802422563691213,\n  'Fantasy': 0.4965712003578291,\n  'Comedy': 0.33096083190372516},\n 1279: {},\n 1280: {'In Netflix queue': 0.873652990606142, 'Drama': 0.4865495370514129},\n 1281: {'Nazis': 0.8100569262356991,\n  'War': 0.46177969142282876,\n  'Comedy': 0.27956113777249664,\n  'Drama': 0.2289385574654367},\n 1282: {'Disney': 0.5495158832918604,\n  'Musical': 0.42762008613601127,\n  'Animation': 0.42656527050652066,\n  'Children': 0.41950804070882475,\n  'Fantasy': 0.3965204026545654},\n 1283: {'gunfight': 0.8191908501469034,\n  'Western': 0.532081125698415,\n  'Drama': 0.2140467862666971},\n 1284: {'Hammett': 0.7513032339654736,\n  'Film-Noir': 0.46046200687313876,\n  'Mystery': 0.35249323076058464,\n  'Crime': 0.3150662043560808},\n 1285: {'high school': 0.9161858918209185, 'Comedy': 0.4007535547294725},\n 1286: {},\n 1287: {},\n 1288: {'music': 0.7209582739690491,\n  'heavy metal': 0.5058273604475201,\n  'mockumentary': 0.44532039735861856,\n  'Comedy': 0.16139266499635682},\n 1289: {},\n 1290: {},\n 1291: {'Holy Grail': 0.6883072993528686,\n  'archaeology': 0.6059721241284228,\n  'Adventure': 0.2851124726954156,\n  'Action': 0.2788220298870835},\n 1292: {'television': 0.8899837614061251,\n  'Comedy': 0.35279062207941475,\n  'Drama': 0.2889077385710268},\n 1293: {'India': 0.9442351365080458, 'Drama': 0.3292719347038736},\n 1295: {},\n 1296: {'E. M. Forster': 0.9128246380527296,\n  'Romance': 0.3314558984315574,\n  'Drama': 0.2385124054610833},\n 1297: {},\n 1298: {},\n 1299: {'Cambodia': 0.6964494444374454,\n  'Vietnam': 0.5644408540428675,\n  'War': 0.3970168010764561,\n  'Drama': 0.1968307732371894},\n 1300: {},\n 1301: {'Shakespeare sort of': 0.7013727281965574,\n  'space': 0.5811206343848339,\n  'Sci-Fi': 0.356431665784405,\n  'Drama': 0.20816236946297592},\n ...}"
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie_profile"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}